forked from yuhui-zh15/SogouWord
-
Notifications
You must be signed in to change notification settings - Fork 0
/
downloadWordList.py
executable file
·54 lines (49 loc) · 1.34 KB
/
downloadWordList.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
import sys
import os
import urllib2
import urllib
import time
url="http:https://pinyin.sogou.com/dict/download_txt.php?id="
beginTime = time.time()
if len(sys.argv) < 2:
print "Too few arguments"
print len(sys.argv)
exit()
begin = 1
end = 1
if int(sys.argv[1]) < 1:
print "please enter a number over 1"
if(len(sys.argv) == 2):
index = sys.argv[1]
urllib.urlretrieve(url+str(index),"word/"+str(index)+".txt")
print "downloading: "+str(index)+".txt"
file = open("word/"+str(index)+".txt")
firstLine = file.readline()
if "<script>" in firstLine:
file = open("word/"+str(index)+".txt","rb")
file.close()
os.remove("word/"+str(index)+".txt")
print "removing: "+str(index)+".txt"
exit()
else:
print index
exit()
if(len(sys.argv) == 3):
begin = int(sys.argv[1])
end = int(sys.argv[2])
print len(sys.argv)
for index in xrange(begin,end):
urllib.urlretrieve(url+str(index),"word/"+str(index)+".txt")
print "downloading: "+str(index)+".txt"
file = open("word/"+str(index)+".txt")
firstLine = file.readline()
if "<script>" in firstLine:
file = open("word/"+str(index)+".txt","rb")
file.close()
os.remove("word/"+str(index)+".txt")
print "removing: "+str(index)+".txt"
else:
print index
exitTime = time.time()
print "Spent "+str(exitTime-beginTime)+"s"