Skip to content

Commit

Permalink
Map/Reduce test script
Browse files Browse the repository at this point in the history
  • Loading branch information
sitnin committed Oct 20, 2010
1 parent 7be1e4b commit 5335575
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 0 deletions.
42 changes: 42 additions & 0 deletions mrtest/mrtest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import locale, sys
locale.setlocale(locale.LC_ALL, 'ru_RU.UTF-8')
reload(sys)
sys.setdefaultencoding('utf-8')
from operator import itemgetter
import string
import datetime

def map():
res = list()
for line in sys.stdin:
line = line.strip()
words = line.split()
for word in words:
res.append([unicode(word.strip(string.punctuation+string.whitespace+"«»…".decode("utf-8")).lower()), 1])
return res


def reduce(mapped_words):
word2count = {}
for word, count in mapped_words:
if len(word) > 2:
try:
word2count[word] = word2count.get(word, 0) + count
except ValueError:
pass

sorted_word2count = sorted(word2count.items(), key=itemgetter(0))
for word, count in sorted_word2count:
if count == 100:
print '<div>%s<span>%s</div>'% (word, count)
# print "----------------------------------------"
# for word, count in sorted_word2count:
# print '%s\t%s'% (word, count)

start = datetime.datetime.now()
reduce(map())
end = datetime.datetime.now()
print end-start
Binary file added mrtest/mrtest.txt.zip
Binary file not shown.
Binary file added mrtest/vm.txt.zip
Binary file not shown.

0 comments on commit 5335575

Please sign in to comment.