Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete rewrite #13

Merged
merged 23 commits into from
Feb 27, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rewrite hash.
  • Loading branch information
ashtuchkin committed Feb 11, 2017
commit 893a039a33d15358e18b8d2c69ef15a59301f1f5
74 changes: 59 additions & 15 deletions src/primitives/hash.h
Original file line number Diff line number Diff line change
@@ -1,34 +1,78 @@
#pragma once
#include <stdint.h>
#include <cstring>

// Simple, non-crypto string hashing algorithm djb2 by Dan Bernstein: compile-time and runtime versions,
// plus user-literal version is provided to allow "abc"_hash to return hash of "abc".
// MurmurHash3 32 bit, derived from https://github.com/aappleby/smhasher
// Both compile-time and runtime versions provided.
// User literal version is also defined to allow "abc"_hash to return hash of "abc".
// One useful property of this hash is that hash("") == 0.

constexpr uint32_t static_strlen(const char *str) {
return (*str == 0) ? 0 : (1 + static_strlen(str+1));
constexpr uint32_t rotl32(uint32_t x, int8_t r) {
// GCC is smart enough to convert this to a single 'ROR' instruction on ARM.
return (x << r) | (x >> (32 - r));
}

constexpr uint32_t static_hash_prefix(const char *str, uint32_t prefix_len) {
return (prefix_len == 0) ? 5381 : ((static_hash_prefix(str, prefix_len-1) * 33) ^ str[prefix_len-1]);
template<bool static_version>
constexpr uint32_t MurmurHash3_32(const char *data, uint32_t len, uint32_t seed = 0) {
uint32_t h1 = seed, k1 = 0, i = 0;
for(; i+3 < len; i += 4) {
if (static_version) {
k1 = ((uint8_t)data[i+0] << 0) |
((uint8_t)data[i+1] << 8) |
((uint8_t)data[i+2] << 16) |
((uint8_t)data[i+3] << 24);
} else {
k1 = *(uint32_t *)(data + i);
}
k1 *= 0xcc9e2d51; k1 = rotl32(k1, 15); k1 *= 0x1b873593;
h1 ^= k1; h1 = rotl32(h1, 13); h1 = h1 * 5 + 0xe6546b64;
}
k1 = 0;
switch(len & 3) {
case 3: k1 ^= (uint8_t)data[i+2] << 16;
case 2: k1 ^= (uint8_t)data[i+1] << 8;
case 1: k1 ^= (uint8_t)data[i];
k1 *= 0xcc9e2d51; k1 = rotl32(k1, 15); k1 *= 0x1b873593;
h1 ^= k1;
};
h1 ^= len;
h1 ^= h1 >> 16; h1 *= 0x85ebca6b;
h1 ^= h1 >> 13; h1 *= 0xc2b2ae35;
h1 ^= h1 >> 16;
return h1;
}

constexpr uint32_t static_strlen(const char *str) {
uint32_t len = 0;
while (str[len]) len++;
return len;
}

constexpr uint32_t static_hash(const char *str) {
return static_hash_prefix(str, static_strlen(str));
return MurmurHash3_32<true>(str, static_strlen(str));
}

constexpr uint32_t operator "" _hash(const char* str, unsigned int len) { // User literal.
return static_hash_prefix(str, len);
// String user literal.
constexpr uint32_t operator ""_hash(const char* str, size_t len) {
return MurmurHash3_32<true>(str, len);
}

// Runtime version of our hash. 'hash' argument can be used for 'streaming mode'.
inline uint32_t runtime_hash(const char *str, uint32_t hash = 5381) {
while (*str)
hash = (hash * 33) ^ *str;
return hash;
// Runtime version of our hash - to be used on dynamic data.
inline uint32_t runtime_hash(const char *str, size_t len) {
return MurmurHash3_32<false>(str, len);
}


// Tests that the algorithm implemented correctly
static_assert("abc123123"_hash == 2841232309, "MurmurHash3 validity test");
static_assert(""_hash == 0, "MurmurHash3 validity test");
static_assert("a"_hash == 1009084850, "MurmurHash3 validity test");
static_assert("a§"_hash == 221796761, "MurmurHash3 validity test");
static_assert("ab§da"_hash == 3670539345, "MurmurHash3 validity test");


// Simple, append-only hash table with constant capacity.
/*
template<typename T, uint32_t C>
class HashTable {
static_assert(C > 0, "Capacity should be positive.");
Expand Down Expand Up @@ -80,5 +124,5 @@ class HashTable {
T *elems_[kModulo];
uint32_t hashes_[kModulo];
};

*/

19 changes: 11 additions & 8 deletions src/primitives/string_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "string_utils.h"
#include <utility>
#include <Arduino.h>

constexpr int max_input_str_len = 256;
Expand Down Expand Up @@ -87,19 +88,21 @@ bool suffixed_by_int(char *word, char **first_digit, uint32_t *value) {
}

// Return a static, zero-terminated array of hashes of provided words.
HashedWord *hash_words(char **words) {
HashedWord *hash_words(char *str) {
static HashedWord hashes[max_words+1];
uint32_t i = 0;
for (; *words && i < max_words; words++, i++) {
char *cur_word = hashes[i].word = *words;
char *first_digit;
char *cur_word, *first_digit;
for (; (cur_word = next_word(&str)) && i < max_words; i++) {
hashes[i].word = cur_word;
if (suffixed_by_int(cur_word, &first_digit, &hashes[i].idx)) {
char digit = *first_digit; *first_digit = 0; // Temporarily replace first digit with '\0' to end string.
hashes[i].hash = runtime_hash("#", runtime_hash(cur_word)); // Make hash of curword + "#"
*first_digit = digit;
char c = '#';
// Create hash of curword + '#'
std::swap(*first_digit, c);
hashes[i].hash = runtime_hash(cur_word, first_digit-cur_word+1);
std::swap(*first_digit, c);
// hashes[i].idx is set in suffixed_by_int
} else { // Regular case.
hashes[i].hash = runtime_hash(cur_word);
hashes[i].hash = runtime_hash(cur_word, strlen(cur_word));
hashes[i].idx = -1; // To avoid clashing with existing indexes.
}
}
Expand Down
5 changes: 3 additions & 2 deletions src/primitives/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,6 @@ struct HashedWord {
inline bool as_float(float *res) { return parse_float(word, res); }
};

// Return a static, zero-terminated array of hashes of provided words.
HashedWord *hash_words(char **words);
// Return a static, zero-terminated array of hashes for words in given string.
// NOTE: Provided string is changed - null characters are added after words.
HashedWord *hash_words(char *str);