Skip to content

Commit

Permalink
Octave can be used in demo in lieu of matlab.
Browse files Browse the repository at this point in the history
  • Loading branch information
Galileo authored and Galileo committed Oct 15, 2015
1 parent 853ab67 commit 9be7cf8
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 0 deletions.
2 changes: 2 additions & 0 deletions demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ if [[ $? -eq 0 ]]
then
if [ "$1" = 'matlab' ]; then
matlab -nodisplay -nodesktop -nojvm -nosplash < ./eval/matlab/read_and_evaluate.m 1>&2
elif [ "$1" = 'octave' ]; then
octave < ./eval/octave/read_and_evaluate_octave.m 1>&2
else
python eval/python/evaluate.py
fi
Expand Down
10 changes: 10 additions & 0 deletions eval/octave/WordLookup_octave.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
function index = WordLookup_octave(InputString)
global wordMap

if isfield(wordMap, InputString)
index = wordMap.(InputString);
elseif isfield(wordMap, '<unk>')
index = wordMap.('<unk>');
else
index = 0;
end
80 changes: 80 additions & 0 deletions eval/octave/evaluate_vectors_octave.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
function [BB] = evaluate_vectors_octave(W)

global wordMap

filenames = {'capital-common-countries' 'capital-world' 'currency' 'city-in-state' 'family' 'gram1-adjective-to-adverb' ...
'gram2-opposite' 'gram3-comparative' 'gram4-superlative' 'gram5-present-participle' 'gram6-nationality-adjective' ...
'gram7-past-tense' 'gram8-plural' 'gram9-plural-verbs'};
path = './eval/question-data/';

split_size = 100; %to avoid memory overflow, could be increased/decreased depending on system and vocab size

correct_sem = 0; %count correct semantic questions
correct_syn = 0; %count correct syntactic questions
correct_tot = 0; %count correct questions
count_sem = 0; %count all semantic questions
count_syn = 0; %count all syntactic questions
count_tot = 0; %count all questions
full_count = 0; %count all questions, including those with unknown words


if isfield(wordMap, '<unk>')
unkkey = wordMap.('<unk>');
else
unkkey = 0;
end

for j=1:length(filenames);

clear dist;

fid=fopen([path filenames{j} '.txt']);
temp=textscan(fid,'%s%s%s%s');
fclose(fid);
ind1 = cellfun(@WordLookup_octave,temp{1}); %indices of first word in analogy
ind2 = cellfun(@WordLookup_octave,temp{2}); %indices of second word in analogy
ind3 = cellfun(@WordLookup_octave,temp{3}); %indices of third word in analogy
ind4 = cellfun(@WordLookup_octave,temp{4}); %indices of answer word in analogy
full_count = full_count + length(ind1);
ind = (ind1 ~= unkkey) & (ind2 ~= unkkey) & (ind3 ~= unkkey) & (ind4 ~= unkkey); %only look at those questions which have no unknown words
ind1 = ind1(ind);
ind2 = ind2(ind);
ind3 = ind3(ind);
ind4 = ind4(ind);
disp([filenames{j} ':']);
mx = zeros(1,length(ind1));
num_iter = ceil(length(ind1)/split_size);
for jj=1:num_iter
range = (jj-1)*split_size+1:min(jj*split_size,length(ind1));
dist = full(W * (W(ind2(range),:)' - W(ind1(range),:)' + W(ind3(range),:)')); %cosine similarity if input W has been normalized
for i=1:length(range)
dist(ind1(range(i)),i) = -Inf;
dist(ind2(range(i)),i) = -Inf;
dist(ind3(range(i)),i) = -Inf;
end
[~, mx(range)] = max(dist); %predicted word index
end

val = (ind4 == mx'); %correct predictions
count_tot = count_tot + length(ind1);
correct_tot = correct_tot + sum(val);
disp(['ACCURACY TOP1: ' num2str(mean(val)*100,'%-2.2f') '% (' num2str(sum(val)) '/' num2str(length(val)) ')']);
if j < 6
count_sem = count_sem + length(ind1);
correct_sem = correct_sem + sum(val);
else
count_syn = count_syn + length(ind1);
correct_syn = correct_syn + sum(val);
end

disp(['Total accuracy: ' num2str(100*correct_tot/count_tot,'%-2.2f') '% Semantic accuracy: ' num2str(100*correct_sem/count_sem,'%-2.2f') '% Syntactic accuracy: ' num2str(100*correct_syn/count_syn,'%-2.2f') '%']);

end
disp('________________________________________________________________________________');
disp(['Questions seen/total: ' num2str(100*count_tot/full_count,'%-2.2f') '% (' num2str(count_tot) '/' num2str(full_count) ')']);
disp(['Semantic Accuracy: ' num2str(100*correct_sem/count_sem,'%-2.2f') '% (' num2str(correct_sem) '/' num2str(count_sem) ')']);
disp(['Syntactic Accuracy: ' num2str(100*correct_syn/count_syn,'%-2.2f') '% (' num2str(correct_syn) '/' num2str(count_syn) ')']);
disp(['Total Accuracy: ' num2str(100*correct_tot/count_tot,'%-2.2f') '% (' num2str(correct_tot) '/' num2str(count_tot) ')']);
BB = [100*correct_sem/count_sem 100*correct_syn/count_syn 100*correct_tot/count_tot];

end
35 changes: 35 additions & 0 deletions eval/octave/read_and_evaluate_octave.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
addpath('./eval/octave');
if(~exist('vocab_file'))
vocab_file = 'vocab.txt';
end
if(~exist('vectors_file'))
vectors_file = 'vectors.bin';
end

fid = fopen(vocab_file, 'r');
words = textscan(fid, '%s %f');
fclose(fid);
words = words{1};
vocab_size = length(words);
global wordMap

wordMap = struct();
for i=1:numel(words)
wordMap.(words{i}) = i;
end

fid = fopen(vectors_file,'r');
fseek(fid,0,'eof');
vector_size = ftell(fid)/16/vocab_size - 1;
frewind(fid);
WW = fread(fid, [vector_size+1 2*vocab_size], 'double')';
fclose(fid);

W1 = WW(1:vocab_size, 1:vector_size); % word vectors
W2 = WW(vocab_size+1:end, 1:vector_size); % context (tilde) word vectors

W = W1 + W2; %Evaluate on sum of word vectors
W = bsxfun(@rdivide,W,sqrt(sum(W.*W,2))); %normalize vectors before evaluation
evaluate_vectors_octave(W);
exit

0 comments on commit 9be7cf8

Please sign in to comment.