Skip to content

Commit

Permalink
Add data downloading
Browse files Browse the repository at this point in the history
  • Loading branch information
RuiShu committed Mar 14, 2018
1 parent 5fbb09b commit 7f25f87
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 0 deletions.
60 changes: 60 additions & 0 deletions data/download_mnist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
import numpy as np
import subprocess
from scipy.io import loadmat, savemat
from skimage.transform import resize

def mnist_resize(x):
H, W, C = 32, 32, 3
x = x.reshape(-1, 28, 28)
resized_x = np.empty((len(x), H, W), dtype='float32')
for i, img in enumerate(x):
# resize returns [0, 1]
resized_x[i] = resize(img, (H, W), mode='reflect')

# Retile to make RGB
resized_x = resized_x.reshape(-1, H, W, 1)
resized_x = np.tile(resized_x, (1, 1, 1, C))
return resized_x

def main():
if os.path.exists('mnist.npz'):
print "Using existing mnist.npz"

else:
print "Opening subprocess to download data from URL"
subprocess.check_output(
'''
wget https://s3.amazonaws.com/img-datasets/mnist.npz
''',
shell=True)

if os.path.exists('mnist32_train.mat') and os.path.exists('mnist32_test.mat'):
print "Using existing mnist32_train.mat and mnist32_test.mat"

else:
print "Resizing mnist.npz to (32, 32, 3)"
data = np.load('mnist.npz')
trainx = data['x_train']
trainy = data['y_train']
trainx = mnist_resize(trainx)
savemat('mnist32_train.mat', {'X': trainx, 'y': trainy})

testx = data['x_test']
testy = data['y_test']
testx = mnist_resize(testx)
savemat('mnist32_test.mat', {'X': testx, 'y': testy})

print "Loading mnist32_train.mat for sanity check"
data = loadmat('mnist32_train.mat')
print data['X'].shape, data['X'].min() ,data['X'].max()
print data['y'].shape, data['y'].min() ,data['y'].max()

print "Loading mnist32_test.mat for sanity check"
data = loadmat('mnist32_test.mat')
print data['X'].shape, data['X'].min() ,data['X'].max()
print data['y'].shape, data['y'].min() ,data['y'].max()


if __name__ == '__main__':
main()
31 changes: 31 additions & 0 deletions data/download_svhn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import subprocess
import os
from scipy.io import loadmat

def main():
if os.path.exists('test_32x32.mat') and os.path.exists('train_32x32.mat'):
print "Using existing data"

else:
print "Opening subprocess to download data from URL"
subprocess.check_output(
'''
wget https://ufldl.stanford.edu/housenumbers/train_32x32.mat
wget https://ufldl.stanford.edu/housenumbers/test_32x32.mat
wget https://ufldl.stanford.edu/housenumbers/extra_32x32.mat
''',
shell=True)

print "Loading train_32x32.mat for sanity check"
data = loadmat('train_32x32.mat')
print data['X'].shape, data['X'].min() ,data['X'].max()
print data['y'].shape, data['y'].min() ,data['y'].max()

print "Loading test_32x32.mat for sanity check"
data = loadmat('test_32x32.mat')
print data['X'].shape, data['X'].min() ,data['X'].max()
print data['y'].shape, data['y'].min() ,data['y'].max()


if __name__ == '__main__':
main()

0 comments on commit 7f25f87

Please sign in to comment.