Skip to content

Commit

Permalink
py: Faster qstr search.
Browse files Browse the repository at this point in the history
Today qstr implementation scans strings sequntially.
In cases there are many strings this can become very inefficient.
This change improves qstr search performance by using binary search in
sorted qstr pools, when possible.

This change introduces an option to create a sorted string pool, which
is then searched by a binary search instead of sequential search.

qstr pool can be either "sorted" or "unsorted", whereas the unsorted is
searched sequentally as today.
Native modules (MP_ROM_QSTR) and frozen modules generate sorted pools.
Currently runtime strings are unsorted.

The constant string pools is split into two and a new pool is introduced,
"special_const_pool". This is required because the first sequence of
strings already requires special ordering therefore created unsorted,
while the rest of the constants are generated sorted.

qstr_find_strn searches strings in each pool. If the pool is sorted and
larger than a threshold, it will be search using binary search instead
of sequential search, significantly improving performance.
  • Loading branch information
amirgon committed Jul 2, 2022
1 parent 9b48634 commit 18eefe7
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 23 deletions.
17 changes: 13 additions & 4 deletions py/makeqstrdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,21 @@ def print_qstr_data(qcfgs, qstrs):
print("")

# add NULL qstr with no hash or data
print('QDEF(MP_QSTRnull, 0, 0, "")')
print('QDEF0(MP_QSTRnull, 0, 0, "")')

# go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
# split qstr values into two pools. static consts first.
q0_values = [q for q in qstrs.values() if q[0] < 0]
q1_values = [q for q in qstrs.values() if q[0] >= 0]

# go through each qstr in pool 0 and print it out. pool0 has special sort.
for order, ident, qstr in sorted(q0_values, key=lambda x: x[0]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF0(MP_QSTR_%s, %s)" % (ident, qbytes))

# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
for order, ident, qstr in sorted(q1_values, key=lambda x: x[2]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes))


def do_work(infiles):
Expand Down
109 changes: 94 additions & 15 deletions py/qstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,34 +74,82 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) {
return hash;
}

const qstr_hash_t mp_qstr_const_hashes[] = {
const qstr_hash_t mp_qstr_const_hashes0[] = {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) hash,
#define QDEF0(id, hash, len, str) hash,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_len_t mp_qstr_const_lengths[] = {
const qstr_hash_t mp_qstr_const_hashes1[] = {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) len,
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) hash,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_len_t mp_qstr_const_lengths0[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str) len,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_len_t mp_qstr_const_lengths1[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) len,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_pool_t mp_qstr_special_const_pool = {
NULL, // no previous pool
0, // no previous pool
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes0,
(qstr_len_t *)mp_qstr_const_lengths0,
false, // special constant qstrs are not sorted
{
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str) str,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
(const char *)"", // spacer for MP_QSTRspecial_const_number_of
},
};

const qstr_pool_t mp_qstr_const_pool = {
NULL, // no previous pool
0, // no previous pool
(qstr_pool_t *)&mp_qstr_special_const_pool,
MP_QSTRspecial_const_number_of + 1,
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRnumber_of, // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes,
(qstr_len_t *)mp_qstr_const_lengths,
MP_QSTRnumber_of -
(MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes1,
(qstr_len_t *)mp_qstr_const_lengths1,
true, // constant qstrs are sorted
{
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) str,
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) str,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
},
};
Expand Down Expand Up @@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
pool->alloc = new_alloc;
pool->len = 0;
pool->sorted = false;
MP_STATE_VM(last_pool) = pool;
DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
}
Expand All @@ -179,13 +228,43 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
return MP_STATE_VM(last_pool)->total_prev_len + at;
}

#define MP_QSTR_SEARCH_THRESHOLD 10

qstr qstr_find_strn(const char *str, size_t str_len) {
// work out hash of str
mp_uint_t str_hash = qstr_compute_hash((const byte *)str, str_len);

// search pools for the data
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
size_t low = 0;
size_t high = pool->len - 1;

// binary search inside the pool
if (pool->sorted) {
while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
size_t mid = (low + high + 1) / 2;
size_t len = pool->lengths[mid];
if (len > str_len) {
len = str_len;
}
int cmp = memcmp(pool->qstrs[mid], str, str_len);
if (cmp < 0) {
low = mid;
} else if (cmp > 0) {
high = mid;
} else {
if (pool->lengths[mid] < str_len) {
low = mid;
} else if (pool->lengths[mid] > str_len) {
high = mid;
} else {
return pool->total_prev_len + mid;
}
}
}
}

// sequential search for the remaining strings
for (mp_uint_t at = low; at < high + 1; at++) {
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
return pool->total_prev_len + at;
Expand Down
17 changes: 15 additions & 2 deletions py/qstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,21 @@
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
enum {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) id,

#define QDEF0(id, hash, len, str) id,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1

MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above

#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) id,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1

#endif
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
};
Expand Down Expand Up @@ -71,6 +83,7 @@ typedef struct _qstr_pool_t {
size_t len;
qstr_hash_t *hashes;
qstr_len_t *lengths;
bool sorted;
const char *qstrs[];
} qstr_pool_t;

Expand Down
2 changes: 1 addition & 1 deletion tools/makemanifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def main():
b'#include "py/emitglue.h"\n'
b"extern const qstr_pool_t mp_qstr_const_pool;\n"
b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n"
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n"
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, false, 0\n"
b"};\n"
b'const char mp_frozen_names[] = { MP_FROZEN_STR_NAMES "\\0"};\n'
b"const mp_raw_code_t *const mp_frozen_mpy_content[] = {NULL};\n"
Expand Down
3 changes: 2 additions & 1 deletion tools/mpy-tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
continue
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
new = sorted(new.values(), key=lambda x: x[0])
new = sorted(new.values(), key=lambda x: x[2])

print('#include "py/mpconfig.h"')
print('#include "py/objint.h"')
Expand Down Expand Up @@ -1482,6 +1482,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
print(" %u, // used entries" % len(new))
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
print(" true, // entries are sorted")
print(" {")
for _, _, qstr, qbytes in new:
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
Expand Down

0 comments on commit 18eefe7

Please sign in to comment.