-
Notifications
You must be signed in to change notification settings - Fork 4
/
geograph.py
35 lines (30 loc) · 794 Bytes
/
geograph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# gets the graph between all geotagged items
# needs geo and graph
import gzip
linecount = 0
geoitems = set()
for line in gzip.open('geo.txt.gz') :
linecount += 1
if (linecount % 1000000) == 0 : print linecount / 1000000
if line.startswith('#') :
continue
lat, long, p, s = line.strip().split(' ')
geoitems.add(s)
print linecount, 'lines'
print len(geoitems), 'geoitems'
output = gzip.open('geograph.txt.gz', 'w')
count = 0
linecount = 0
for line in gzip.open('graph.txt.gz') :
linecount += 1
if (linecount % 1000000) == 0 : print linecount / 1000000
if line.startswith('#') :
output.write(line)
continue
s, p, o = line.strip().split(' ')
if s in geoitems and o in geoitems :
output.write(line)
count += 1
print linecount, 'lines'
print count, 'results'
output.close()