From 31ba60f8364a4009ddc3d45fee90c84b43d88d2c Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 02:51:16 +0200 Subject: str: Initial implementation of string slicing. Only step=1 and non-negative indexes are supported so far. --- py/objstr.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 48abf4951..46adabcec 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -29,7 +29,21 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { case RT_BINARY_OP_SUBSCR: // string access // XXX a massive hack! - return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); + + // TODO: need predicate to check for int-like type (bools are such for example) + // ["no", "yes"][1 == 2] is common idiom + if (MP_OBJ_IS_SMALL_INT(rhs_in)) { + // TODO: This implements byte string access for single index so far + return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); + } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { + int start, stop, step; + mp_obj_slice_get(rhs_in, &start, &stop, &step); + assert(step == 1); + return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); + } else { + // Throw TypeError here + assert(0); + } case RT_BINARY_OP_ADD: case RT_BINARY_OP_INPLACE_ADD: -- cgit v1.2.3 From decad08ef57aa3cf3960ce65e29b194cb97c6d22 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 23:36:56 +0200 Subject: str: Handle non-positive slice indexes. --- py/objstr.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 46adabcec..54dd087a4 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -39,6 +39,13 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { int start, stop, step; mp_obj_slice_get(rhs_in, &start, &stop, &step); assert(step == 1); + int len = strlen(lhs_str); + if (start < 0) { + start = len + start; + } + if (stop <= 0) { + stop = len + stop; + } return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); } else { // Throw TypeError here -- cgit v1.2.3 From e606cb656165aff2424fb6ca45f09d606246d073 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 4 Jan 2014 01:34:23 +0200 Subject: slice: Allow building with MICROPY_ENABLE_SLICE=0. --- py/objstr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 54dd087a4..8e3e9d902 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -35,6 +35,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { if (MP_OBJ_IS_SMALL_INT(rhs_in)) { // TODO: This implements byte string access for single index so far return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); +#if MICROPY_ENABLE_SLICE } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { int start, stop, step; mp_obj_slice_get(rhs_in, &start, &stop, &step); @@ -47,6 +48,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { stop = len + stop; } return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); +#endif } else { // Throw TypeError here assert(0); -- cgit v1.2.3 From f8b9d3c41addea79851c355f014db9f0f256cdaf Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 4 Jan 2014 01:38:26 +0200 Subject: str: Throw TypeError for invalid index type and clean up comments. --- py/objstr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 8e3e9d902..6a0721d45 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -27,13 +27,11 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { const char *lhs_str = qstr_str(lhs->qstr); switch (op) { case RT_BINARY_OP_SUBSCR: - // string access - // XXX a massive hack! - // TODO: need predicate to check for int-like type (bools are such for example) // ["no", "yes"][1 == 2] is common idiom if (MP_OBJ_IS_SMALL_INT(rhs_in)) { // TODO: This implements byte string access for single index so far + // TODO: Handle negative indexes. return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); #if MICROPY_ENABLE_SLICE } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { @@ -50,8 +48,9 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); #endif } else { - // Throw TypeError here - assert(0); + // Message doesn't match CPython, but we don't have so much bytes as they + // to spend them on verbose wording + nlr_jump(mp_obj_new_exception_msg(rt_q_TypeError, "index must be int")); } case RT_BINARY_OP_ADD: -- cgit v1.2.3 From c8d1384fc0c7aafa5dee3445ece20f4e43dfa9c1 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 4 Jan 2014 01:06:10 +0000 Subject: Fix int -> machine_int_t; add print to slice test. --- py/objstr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 6a0721d45..59547e3cd 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -35,7 +35,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); #if MICROPY_ENABLE_SLICE } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { - int start, stop, step; + machine_int_t start, stop, step; mp_obj_slice_get(rhs_in, &start, &stop, &step); assert(step == 1); int len = strlen(lhs_str); -- cgit v1.2.3 From 6ee1e383d6b95d0bb5f2902ec91b8d831e4b5803 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 4 Jan 2014 03:47:34 +0200 Subject: str slice: Trim slice indexes to be in range. --- py/objstr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 59547e3cd..54e6f3770 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -41,9 +41,20 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { int len = strlen(lhs_str); if (start < 0) { start = len + start; + if (start < 0) { + start = 0; + } + } else if (start > len) { + start = len; } if (stop <= 0) { stop = len + stop; + // CPython returns empty string in such case + if (stop < 0) { + stop = start; + } + } else if (stop > len) { + stop = len; } return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); #endif -- cgit v1.2.3 From eb7bfcb28697f6fb2d4d933bc39233aa15423a20 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 4 Jan 2014 15:57:35 +0000 Subject: Split qstr into pools, and put initial pool in ROM. Qstr's are now split into a linked-list of qstr pools. This has 2 benefits: the first pool can be in ROM (huge benefit, since we no longer use RAM for the core qstrs), and subsequent pools use m_new for the next pool instead of m_renew (thus avoiding a huge single table for all the qstrs). Still would be better to use a hash table, but this scheme takes us part of the way (eventually convert the pools to hash tables). Also fixed bug with import. Also improved the way the module code is referenced (not magic number 1 anymore). --- py/objstr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 54e6f3770..03a761863 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -7,6 +7,7 @@ #include "nlr.h" #include "misc.h" #include "mpconfig.h" +#include "mpqstr.h" #include "obj.h" #include "runtime0.h" #include "runtime.h" @@ -61,7 +62,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { } else { // Message doesn't match CPython, but we don't have so much bytes as they // to spend them on verbose wording - nlr_jump(mp_obj_new_exception_msg(rt_q_TypeError, "index must be int")); + nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "index must be int")); } case RT_BINARY_OP_ADD: @@ -134,7 +135,7 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { return mp_obj_new_str(qstr_from_str_take(joined_str, required_len + 1)); bad_arg: - nlr_jump(mp_obj_new_exception_msg(rt_q_TypeError, "?str.join expecting a list of str's")); + nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's")); } void vstr_printf_wrapper(void *env, const char *fmt, ...) { @@ -158,7 +159,7 @@ mp_obj_t str_format(int n_args, const mp_obj_t *args) { vstr_add_char(vstr, '{'); } else if (*str == '}') { if (arg_i >= n_args) { - nlr_jump(mp_obj_new_exception_msg(rt_q_IndexError, "tuple index out of range")); + nlr_jump(mp_obj_new_exception_msg(MP_QSTR_IndexError, "tuple index out of range")); } mp_obj_print_helper(vstr_printf_wrapper, vstr, args[arg_i]); arg_i++; -- cgit v1.2.3 From 71c5181a8dfa69ba9f5ca322a3aba0660be2e166 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 4 Jan 2014 20:21:15 +0000 Subject: Convert Python types to proper Python type hierarchy. Now much more inline with how CPython does types. --- py/objstr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 03a761863..27c9440d0 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -179,6 +179,7 @@ const mp_obj_type_t str_type = { { &mp_const_type }, "str", str_print, // print + NULL, // make_new NULL, // call_n NULL, // unary_op str_binary_op, // binary_op -- cgit v1.2.3