-
Notifications
You must be signed in to change notification settings - Fork 29
/
download.py
executable file
·248 lines (223 loc) · 10.7 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/local/bin/python2.5
###update
#Copyright 2005-2008 J. David Gladstone Institutes, San Francisco California
#Author Nathan Salomonis - [email protected]
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is furnished
#to do so, subject to the following conditions:
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""This module contains generic methods for downloading and decompressing files designated
online files and coordinating specific database build operations for all AltAnalyze supported
gene ID systems with other program modules. """
import os
import sys
import unique
import string
import export
import traceback
def filepath(filename):
fn = unique.filepath(filename)
return fn
def read_directory(sub_dir):
dir_list = unique.read_directory(sub_dir); dir_list2 = []
###Code to prevent folder names from being included
for entry in dir_list:
if entry[-4:] == ".txt" or entry[-4:] == ".csv": dir_list2.append(entry)
return dir_list2
def cleanUpLine(line):
line = string.replace(line,'\n','')
line = string.replace(line,'\c','')
data = string.replace(line,'\r','')
data = string.replace(data,'"','')
return data
def zipDirectory(dir):
#http:https://www.testingreflections.com/node/view/8173
import zipfile
dir = filepath(dir); zip_file = dir+'.zip'
p = string.split(dir,'/'); top=p[-1]
zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
root_len = len(os.path.abspath(dir))
for root, dirs, files in os.walk(dir):
archive_root = os.path.abspath(root)[root_len:]
for f in files:
fullpath = os.path.join(root, f)
archive_name = os.path.join(top+archive_root, f)
zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED)
zip.close()
return zip_file
def unzipFiles(filename,dir):
import zipfile
output_filepath = filepath(dir+filename)
try:
zfile = zipfile.ZipFile(output_filepath)
for name in zfile.namelist():
if name.endswith('/'):null=[] ### Don't need to export
else:
try: outfile = export.ExportFile(dir+name)
except Exception: outfile = export.ExportFile(dir+name[1:])
outfile.write(zfile.read(name)); outfile.close()
#print 'Zip extracted to:',output_filepath
status = 'completed'
except Exception, e:
print e
print 'WARNING!!!! The zip file',output_filepath,'does not appear to be a valid zip archive file or is currupt.'
status = 'failed'
return status
def download(url,dir,file_type):
try: dp = download_protocol(url,dir,file_type); gz_filepath, status = dp.getStatus()
except Exception:
gz_filepath='failed'; status = "Internet connection was not established. Re-establish and try again."
if status == 'remove':
#print "\nRemoving zip file:",gz_filepath
try: os.remove(gz_filepath); status = 'removed'
except Exception: null=[] ### Not sure why this error occurs since the file is not open
return gz_filepath, status
class download_protocol:
def __init__(self,url,dir,file_type):
"""Copy the contents of a file from a given URL to a local file."""
filename = url.split('/')[-1]
if len(file_type) == 2: filename, file_type = file_type ### Added this feature for when a file has an invalid filename
output_filepath_object = export.createExportFile(dir+filename,dir[:-1])
output_filepath = filepath(dir+filename)
print "Downloading the following file:",filename,' ',
self.original_increment = 10
self.increment = 0
import urllib,urllib2
from urllib import urlretrieve
try:
try: webfile, msg = urlretrieve(url,output_filepath,reporthook=self.reporthookFunction)
except:
if 'Binary' in traceback.format_exc(): #IOError: [Errno ftp error] 200 Switching to Binary mode.
### https://bugs.python.org/issue1067702 - some machines the socket doesn't close and causes an error - reload to close the socket
reload(urllib)
webfile, msg = urlretrieve(url,output_filepath,reporthook=self.reporthookFunction)
reload(urllib)
if 'SSL' in traceback.format_exc():
### SSL error encountered for the target website
#https://github.com/NagiosEnterprises/ncpa/issues/195
import urllib2, ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
ssl._create_default_https_context = _create_unverified_https_context
print '...downloading (be patient)'
webfile, msg = urlretrieve(url,output_filepath)
except:
print 'Unknown URL error encountered...'; forceURLError
print ''
print "\nFile downloaded to:",output_filepath
if '.zip' in filename:
try: decompressZipStackOverflow(filename,dir); status = 'completed'
except Exception:
status = unzipFiles(filename,dir)
if status == 'failed': print 'Zip Extraction failed'
self.gz_filepath = filepath(output_filepath); self.status = 'remove'
print "zip file extracted..."
elif '.gz' in filename:
self.gz_filepath = output_filepath
if len(file_type)==0: extension = '.gz'
else: extension = 'gz'
decompressed_filepath = string.replace(self.gz_filepath,extension,file_type)
### Below code can be too memory intensive
#file_size = os.path.getsize(output_filepath)
#megabtyes = file_size/1000000.00
#if megabtyes>5000: force_error ### force_error is an undefined variable which causes an exception
import gzip; content = gzip.GzipFile(self.gz_filepath, 'rb')
data = open(decompressed_filepath,'wb')
#print "\nExtracting downloaded file:",self.gz_filepath
import shutil; shutil.copyfileobj(content,data)
self.status = 'remove'
else: self.gz_filepath = ''; self.status = 'NA'
def getStatus(self): return self.gz_filepath, self.status
def reporthookFunction(self, blocks_read, block_size, total_size):
if not blocks_read:
print 'Connection opened. Downloading (be patient)'
if total_size < 0:
# Unknown size
print 'Read %d blocks' % blocks_read
else:
amount_read = blocks_read * block_size
percent_read = ((amount_read)*1.00/total_size)*100
if percent_read>self.increment:
#print '%d%% downloaded' % self.increment
print '*',
self.increment += self.original_increment
#print 'Read %d blocks, or %d/%d' % (blocks_read, amount_read, (amount_read/total_size)*100.000)
def createExportFile(new_file,dir):
try:
fn=filepath(new_file); file_var = open(fn,'w')
except IOError:
#print "IOError", fn
fn = filepath(dir)
try:
os.mkdir(fn) ###Re-Create directory if deleted
#print fn, 'written'
except OSError: createExportDir(new_file,dir) ###Occurs if the parent directory is also missing
fn=filepath(new_file); file_var = open(fn,'w')
return file_var
def downloadCurrentVersionUI(filename,secondary_dir,file_type,root):
continue_analysis = downloadCurrentVersion(filename,secondary_dir,file_type)
if continue_analysis == 'no':
import UI
root.destroy(); UI.getUserParameters('no'); sys.exit()
root.destroy()
def downloadCurrentVersion(filename,secondary_dir,file_type):
import UI
file_location_defaults = UI.importDefaultFileLocations()
uds = file_location_defaults['url'] ### Get the location of the download site from Config/default-files.csv
for ud in uds: url_dir = ud.Location() ### Only one entry
dir = export.findParentDir(filename)
filename = export.findFilename(filename)
url = url_dir+secondary_dir+'/'+filename
file,status = download(url,dir,file_type); continue_analysis = 'yes'
if 'Internet' in status:
print_out = "File:\n"+url+"\ncould not be found on server or internet connection is unavailable."
try:
UI.WarningWindow(print_out,'WARNING!!!')
continue_analysis = 'no'
except Exception:
print url
print 'cannot be downloaded';die
elif status == 'remove':
try: os.remove(file) ### Not sure why this works now and not before
except Exception: status = status
return continue_analysis
def decompressZipStackOverflow(zip_file,dir):
zip_file = filepath(dir+zip_file)
###http:https://stackoverflow.com/questions/339053/how-do-you-unzip-very-large-files-in-python
import zipfile
import zlib
src = open(zip_file,"rb")
zf = zipfile.ZipFile(src)
for m in zf.infolist():
# Examine the header
#print m.filename, m.header_offset, m.compress_size, repr(m.extra), repr(m.comment)
src.seek(m.header_offset)
src.read(30) # Good to use struct to unpack this.
nm= src.read(len(m.filename))
if len(m.extra) > 0: ex= src.read(len(m.extra))
if len(m.comment) > 0: cm=src.read(len(m.comment))
# Build a decompression object
decomp= zlib.decompressobj(-15)
# This can be done with a loop reading blocks
out=open(filepath(dir+m.filename), "wb")
result=decomp.decompress(src.read(m.compress_size))
out.write(result); result=decomp.flush()
out.write(result); out.close()
zf.close()
src.close()
if __name__ == '__main__':
path = 'http:https://thebiogrid.org/downloads/archives/Latest%20Release/BIOGRID-ALL-LATEST.tab2.zip'
#import urllib
#path = urllib.quote(path)
dp = download_protocol(path,'downloaded/','')