Skip to content

Commit

Permalink
Closed chatopera#45 enable cache for nearby words
Browse files Browse the repository at this point in the history
  • Loading branch information
hailiang-wang committed Mar 4, 2018
1 parent d4f20e9 commit 97b676e
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 3.1
* 对空间临近词的邻居进行缓存,提高返回速度
* nearby中处理OOV,返回 ([], [])

# 3.0 - 更简单的定制和配置,增加了额外的开销
* 去掉nearby words, 使用 kdtree检索空间词汇的最近临
* 增加了对sk-learn的依赖,但是减少了对词向量的预处理
Expand Down
2 changes: 1 addition & 1 deletion Requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
synonyms>=2.7
synonyms>=3.1
1 change: 1 addition & 0 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def test_similarity(self):

def test_nearby(self):
synonyms.display("奥运") # synonyms.display calls synonyms.nearby
synonyms.display("北新桥") # synonyms.display calls synonyms.nearby


def test():
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

setup(
name='synonyms',
version='3.0',
version='3.1',
description='Chinese Synonyms for Natural Language Processing and Understanding',
long_description=LONGDOC,
author='Hai Liang Wang, Hu Ying Xi',
Expand Down
15 changes: 12 additions & 3 deletions synonyms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
_size = 0
_vectors = None
_stopwords = set()
_cache_nearby = dict()

'''
lambda fns
Expand Down Expand Up @@ -241,10 +242,18 @@ def nearby(word):
'''
Nearby word
'''
w = any2unicode(word)
# read from cache
if w in _cache_nearby: return _cache_nearby[w]

words, scores = [], []
for x in _vectors.neighbours(any2unicode(word)):
words.append(x[0])
scores.append(x[1])
try:
for x in _vectors.neighbours(w):
words.append(x[0])
scores.append(x[1])
except: pass # ignore key error, OOV
# put into cache
_cache_nearby[w] = (words, scores)
return words, scores

def compare(s1, s2, seg=True):
Expand Down

0 comments on commit 97b676e

Please sign in to comment.