-
Notifications
You must be signed in to change notification settings - Fork 0
/
Egoshare_2_GenerateDB.py
executable file
·191 lines (150 loc) · 7.91 KB
/
Egoshare_2_GenerateDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!coding: utf-8
import psyco
from Isolated_Char_Generator import *
from Break_Egoshare_Captcha import *
import shutil
CAPTCHA_BASED = False # using captchas from the website
SIMULATION_BASED = True # using simulated captchas with various fonts
if SIMULATION_BASED:
DEFAULT_SIZE = (30, 30)
GENERATE_TRAINING_SET = True
GENERATE_VALIDATION_SET = False
if GENERATE_TRAINING_SET:
print """
##############################################################################
############## SIMULATION BASED TRAINING SET ######################
##############################################################################
"""
GENERATE_CAPITAL_LETTERS = False
GENERATE_DIGITS = True
elem_to_gen = Generate_Element_List(GENERATE_CAPITAL_LETTERS, GENERATE_DIGITS)
DESTINATION_FOLDER = 'Egoshare/DBTraining-Simulation_based'
CLEAN_DESTINATION_FOLDER = True
DISTORTION_W_MIN = 0
DISTORTION_W_MAX = 1
DISTORTION_H_MIN = 0
DISTORTION_H_MAX = 1
SCALE_MIN = 17
SCALE_MAX = 23
STEP = 1
ALIGN_RANGEY = [0.5]
ALIGN_RANGEX = [0.5]
ROTATIONS = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
FONTS = [("Fonts/comic.ttf", (145, 163)),
("Fonts/vera.ttf", (160, 180)),
("Fonts/califb.ttf", (171, 191))]
Generate_Set(DESTINATION_FOLDER,CLEAN_DESTINATION_FOLDER,DISTORTION_W_MIN,DISTORTION_W_MAX,DISTORTION_H_MIN,
DISTORTION_H_MAX,SCALE_MIN,SCALE_MAX,STEP, elem_to_gen, FONTS, ALIGN_RANGEX, ALIGN_RANGEY, DEFAULT_SIZE, ROTATIONS)
if GENERATE_VALIDATION_SET:
print """
##############################################################################
################# SIMULATION BASED TEST SET #######################
##############################################################################
"""
GENERATE_CAPITAL_LETTERS = False
GENERATE_DIGITS = True
elem_to_gen = Generate_Element_List(GENERATE_CAPITAL_LETTERS, GENERATE_DIGITS)
DESTINATION_FOLDER = 'Egoshare/DBTest-Simulation_based'
CLEAN_DESTINATION_FOLDER = True
DISTORTION_W_MIN = 0
DISTORTION_W_MAX = 2
DISTORTION_H_MIN = 0
DISTORTION_H_MAX = 2
SCALE_MIN = 15
SCALE_MAX = 20
STEP = 2
ALIGN_RANGEY = [0.7, 1]
ALIGN_RANGEX = [0.5]
ROTATIONS = [2, 9, 13, 22]
FONTS = [("Fonts/comic.ttf", (140, 160)),
("Fonts/vera.ttf", (160, 180)),
("Fonts/califb.ttf", (160, 180))]
Generate_Set(DESTINATION_FOLDER,CLEAN_DESTINATION_FOLDER,DISTORTION_W_MIN,DISTORTION_W_MAX,DISTORTION_H_MIN,
DISTORTION_H_MAX,SCALE_MIN,SCALE_MAX,STEP, elem_to_gen, FONTS, ALIGN_RANGEX, ALIGN_RANGEY, DEFAULT_SIZE, ROTATIONS)
def Prepare_Dest_Folder(DEST_FOLDER):
#Création du dossier de destination
if not os.path.isdir(DEST_FOLDER):
os.mkdir(DEST_FOLDER)
#suppression des anciens fichiers
print "Removing older files..."
for subdir in os.listdir(DEST_FOLDER):
if subdir[0] != ".": # to prevent removal of .svn folders !
for file in os.listdir(os.path.join(DEST_FOLDER, subdir)):
os.remove(os.path.join(DEST_FOLDER, subdir, file))
try:
os.rmdir(os.path.join(DEST_FOLDER, subdir))
except Exception, ex:
print "Impossible de supprimer le dossier", os.path.join(DEST_FOLDER, subdir), "..."
print "Done..."
#Création des sous-dossiers
for i in range(10):
folder = os.path.join(DEST_FOLDER, str(i))
if not os.path.isdir(folder):
os.mkdir(folder)
def Generate_Captcha_Based_set(CAPTCHA_SOURCE_FOLDER,DEST_FOLDER):
Prepare_Dest_Folder(DEST_FOLDER)
#Remplissage des sous-dossiers
for folder, subfolders, files in os.walk(CAPTCHA_SOURCE_FOLDER):
for file in [file for file in files if file[-4:] == ".jpg"]:
filename = os.path.join(CAPTCHA_SOURCE_FOLDER, file)
print file
preprocess_captcha_part(os.path.join(folder, file),remove=False)
name1 = file[:-4]+"number_1.bmp"
name2 = file[:-4]+"number_2.bmp"
name3 = file[:-4]+"number_3.bmp"
shutil.move("letter1.bmp", os.path.join(DEST_FOLDER, file[0], name1))
shutil.move("letter2.bmp", os.path.join(DEST_FOLDER, file[1], name2))
shutil.move("letter3.bmp", os.path.join(DEST_FOLDER, file[2], name3))
if CAPTCHA_BASED:
GENERATE_TRAINING_SET = True # using hand-labelled captchas
GENERATE_VALIDATION_SET = True # using hand-labelled captchas
GENERATE_COMPUTER_LABELLED_SET = False
if GENERATE_TRAINING_SET:
print """
##############################################################################
############## CAPTCHA BASED TRAINING SET #########################
##############################################################################
"""
CAPTCHA_SOURCE_FOLDER = "Egoshare/Labelled Catpchas Training"
DEST_FOLDER = "Egoshare/DBTraining-Captcha_based"
Generate_Captcha_Based_set(CAPTCHA_SOURCE_FOLDER,DEST_FOLDER)
if GENERATE_VALIDATION_SET:
print """
##############################################################################
################# CAPTCHA BASED TEST SET ##########################
##############################################################################
"""
CAPTCHA_SOURCE_FOLDER = "Egoshare/Labelled Catpchas Test"
DEST_FOLDER = "Egoshare/DBTest-Captcha_based"
Generate_Captcha_Based_set(CAPTCHA_SOURCE_FOLDER,DEST_FOLDER)
if GENERATE_COMPUTER_LABELLED_SET:
print """
##############################################################################
############ COMPUTER LABELLED CAPTCHA BASED SET ####################
##############################################################################
"""
DEST_FOLDER = "Egoshare/Computer Labelled Captcha based set"
CAPTCHA_SOURCE_FOLDER = "Egoshare/Rough Captchas"
MODEL_FILE = 'Egoshare/Models/captcha_based_TR=687_TEST=143_C=1000_KERNEL=1.svm'
model = load_model(MODEL_FILE)
Prepare_Dest_Folder(DEST_FOLDER)
#Remplissage des sous-dossiers
for folder, subfolders, files in os.walk(CAPTCHA_SOURCE_FOLDER):
for file in [file for file in files if file[-4:] == ".jpg"]:
filename = os.path.join(CAPTCHA_SOURCE_FOLDER, file)
print file
name1 = file[:-4]+"number_1.bmp"
name2 = file[:-4]+"number_2.bmp"
name3 = file[:-4]+"number_3.bmp"
letter1_algo, letter2_algo, letter3_algo = preprocess_captcha_part(os.path.join(folder, file),remove=False)
prediction = break_captcha(model, letter1_algo, letter2_algo, letter3_algo)
shutil.move("letter1.bmp", os.path.join(DEST_FOLDER, prediction[0], name1))
shutil.move("letter2.bmp", os.path.join(DEST_FOLDER, prediction[1], name2))
shutil.move("letter3.bmp", os.path.join(DEST_FOLDER, prediction[2], name3))
print """Done.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!! NOW CORRECT AND COPY MANUALLY COMPUTER LABELLED FILES !!!!!!!!!!
!!!!!!!!! INTO CAPTCHA BASED TRAINING AND TEST FOLDERS !!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
"""
raw_input()