Skip to content

Commit

Permalink
Data generator
Browse files Browse the repository at this point in the history
  • Loading branch information
sitnin committed Oct 23, 2010
1 parent c08f5f3 commit 51cafc8
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.pyc
config.py
.eprj
41 changes: 41 additions & 0 deletions datagen/create_texts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import locale, sys
locale.setlocale(locale.LC_ALL, 'ru_RU.UTF-8')
reload(sys)
sys.setdefaultencoding('utf-8')

import os
import random
import urllib
from BeautifulSoup import BeautifulSoup


count = 100
base_url = "https://vesna.yandex.ru/all.xml"
themes = (
"astronomy","geology","gyroscope","literature","marketing",
"mathematics","music","polit","agrobiologia","law","psychology",
"geography","physics","philosophy","chemistry","estetica"
)


if __name__ == "__main__":
themes_count = len(themes)
for x in xrange(count):
filename = "%s.txt"%str(x).rjust(4, "0")
theme = random.randint(0, themes_count-1)
url = base_url + "?mix=%s&%s=on"%(themes[theme], themes[theme])
print "Fetching %s"%url
data = urllib.urlretrieve(url)
with open(data[0], "r") as f:
utf8_data = f.read().decode("cp1251").encode("utf8")
f.close()
soup = BeautifulSoup(''.join(utf8_data))
with open("data/%s"%filename, "w") as f:
f.write(soup.find("h1").string[7:-1]+"\n")
for p in soup.findAll("p"):
f.write(p.string+"\n")
f.close()
print "Saved to %s"%filename
1 change: 1 addition & 0 deletions datagen/data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.txt
25 changes: 25 additions & 0 deletions datagen/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import locale, sys
locale.setlocale(locale.LC_ALL, 'ru_RU.UTF-8')
reload(sys)
sys.setdefaultencoding('utf-8')

import os


def load_data():
pass

def run():
pass

def print_result():
pass


if __name__ == "__main__":
load_data()
run()
print_result()
20 changes: 18 additions & 2 deletions webapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,30 @@ def get(self, page=None):
if page < 1:
raise tornado.web.HTTPError(404)
offset = (page - 1) * 10
self.render("posts.html", page=page, url=self.request.uri)

while True:
try:
R.send_command("WATCH last_post_id")
R.send_command("MULTI")
post_id = R.incr("last_post_id")
R.send_command("EXEC")
break
except Exception, e:
print(str(e))
self.write(str(post_id))

# self.redirect("")
# self.render("posts.html", page=page, url=self.request.uri)

def post(self):
""" save new post """
title = self.get_argument("title")
body = self.get_argument("body")
tags = self.get_argument("tags")
raise NotImplementedYet()
post_id = R.incr("last_post_id")
self.write(str(post_id))
# self.redirect("")
# raise NotImplementedYet()


class OnePost(tornado.web.RequestHandler):
Expand Down

0 comments on commit 51cafc8

Please sign in to comment.