-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_preprocessing.py
72 lines (52 loc) · 1.83 KB
/
data_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import numpy as np
import cv2
#%% ------------------------------------------ Data directories -------------------------------------------------------------
COVID_DIR = "data/covid/"
NOR_DIR = "data/normal/"
PNEU_DIR = "data/pneumonia/"
#%% ------------------------------------------------ Load data ---------------------------------------------------------
x, y = [], [] ## store images and their labels
RESIZE_TO = 128 ## resize images to 128x128 px
# covid CXR
for path in os.listdir(COVID_DIR) :
try:
image = cv2.resize(cv2.imread(COVID_DIR + path), (RESIZE_TO, RESIZE_TO))
x.append(image)
y.append("COVID")
except:
pass
# normal CXR
for path in os.listdir(NOR_DIR) :
try:
image = cv2.resize(cv2.imread(NOR_DIR + path), (RESIZE_TO, RESIZE_TO))
x.append(image)
y.append("NORMAL")
except:
pass
# pneumonia CXR
for path in os.listdir(PNEU_DIR) :
try:
image = cv2.resize(cv2.imread(PNEU_DIR + path).astype(np.float32), (RESIZE_TO, RESIZE_TO))
x.append(image)
y.append("PNEUMONIA")
except:
pass
#%% ------------------------------------------------- Remove duplicates ----------------------------------------------------------
unique_x, unique_y = [], []
for i in range(len(x)):
if not any(np.array_equal(x[i], arr) for arr in unique_x):
unique_x.append(x[i])
unique_y.append(y[i])
#%% --------------------------
dir = os.path.dirname('COVID_npy/')
if not os.path.exists(dir):
os.makedirs(dir)
unique_x, unique_y = np.array(unique_x), np.array(unique_y)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(["COVID", "NORMAL", "PNEUMONIA"])
unique_y = le.transform(unique_y)
np.save("COVID_npy/input.npy", unique_x)
np.save("COVID_npy/target.npy", unique_y)
print("Completed.")