forked from IntelLabs/distiller
-
Notifications
You must be signed in to change notification settings - Fork 0
/
resnet50.schedule_agp.filters_2.yaml
executable file
·192 lines (180 loc) · 17 KB
/
resnet50.schedule_agp.filters_2.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#
# This schedule performs filter-pruning using L1-norm ranking and AGP for the setting the pruning-rate decay.
#
# Best Top1: 74.782 (epoch 94)
# No. of Parameters: 12,671,168 (of 25,502,912) = 49.69% dense (50.31% sparse)
# Total MACs: 2,037,186,560 (of 4,089,184,256) = 49.82% compute = 2.01x
#
# time python3 compress_classifier.py -a=resnet50 --pretrained -p=50 ../../../data.imagenet/ -j=22 --epochs=100 --lr=0.0005 --compress=resnet50.schedule_agp.filters_2.yaml --validation-split=0 --num-best-scores=10
#
# Parameters:
# +----+-------------------------------------+--------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+
# | | Name | Shape | NNZ (dense) | NNZ (sparse) | Cols (%) | Rows (%) | Ch (%) | 2D (%) | 3D (%) | Fine (%) | Std | Mean | Abs-Mean |
# |----+-------------------------------------+--------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------|
# | 0 | module.conv1.weight | (64, 3, 7, 7) | 9408 | 9408 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.11098 | -0.00043 | 0.06774 |
# | 1 | module.layer1.0.conv1.weight | (32, 64, 1, 1) | 2048 | 2048 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.07631 | -0.00587 | 0.04636 |
# | 2 | module.layer1.0.conv2.weight | (32, 32, 3, 3) | 9216 | 9216 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.04019 | 0.00147 | 0.02596 |
# | 3 | module.layer1.0.conv3.weight | (256, 32, 1, 1) | 8192 | 8192 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.03788 | -0.00045 | 0.02391 |
# | 4 | module.layer1.0.downsample.0.weight | (256, 64, 1, 1) | 16384 | 16384 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.05137 | -0.00304 | 0.02857 |
# | 5 | module.layer1.1.conv1.weight | (32, 256, 1, 1) | 8192 | 8192 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.03148 | 0.00120 | 0.02169 |
# | 6 | module.layer1.1.conv2.weight | (32, 32, 3, 3) | 9216 | 9216 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.03669 | 0.00017 | 0.02582 |
# | 7 | module.layer1.1.conv3.weight | (256, 32, 1, 1) | 8192 | 8192 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.03162 | -0.00060 | 0.02006 |
# | 8 | module.layer1.2.conv1.weight | (32, 256, 1, 1) | 8192 | 8192 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02993 | 0.00020 | 0.02192 |
# | 9 | module.layer1.2.conv2.weight | (32, 32, 3, 3) | 9216 | 9216 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.03611 | 0.00009 | 0.02719 |
# | 10 | module.layer1.2.conv3.weight | (256, 32, 1, 1) | 8192 | 8192 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02778 | -0.00228 | 0.01659 |
# | 11 | module.layer2.0.conv1.weight | (128, 256, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.03164 | -0.00144 | 0.02232 |
# | 12 | module.layer2.0.conv2.weight | (64, 128, 3, 3) | 73728 | 73728 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02147 | 0.00000 | 0.01595 |
# | 13 | module.layer2.0.conv3.weight | (512, 64, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02703 | 0.00005 | 0.01656 |
# | 14 | module.layer2.0.downsample.0.weight | (512, 256, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02051 | -0.00038 | 0.01206 |
# | 15 | module.layer2.1.conv1.weight | (64, 512, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01744 | -0.00008 | 0.01081 |
# | 16 | module.layer2.1.conv2.weight | (128, 64, 3, 3) | 73728 | 73728 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02022 | 0.00011 | 0.01301 |
# | 17 | module.layer2.1.conv3.weight | (512, 128, 1, 1) | 65536 | 65536 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01982 | -0.00107 | 0.01153 |
# | 18 | module.layer2.2.conv1.weight | (64, 512, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02328 | -0.00053 | 0.01618 |
# | 19 | module.layer2.2.conv2.weight | (64, 64, 3, 3) | 36864 | 36864 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02380 | 0.00012 | 0.01667 |
# | 20 | module.layer2.2.conv3.weight | (512, 64, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02561 | 0.00015 | 0.01784 |
# | 21 | module.layer2.3.conv1.weight | (64, 512, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02327 | -0.00090 | 0.01733 |
# | 22 | module.layer2.3.conv2.weight | (64, 64, 3, 3) | 36864 | 36864 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02368 | -0.00043 | 0.01789 |
# | 23 | module.layer2.3.conv3.weight | (512, 64, 1, 1) | 32768 | 32768 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02287 | -0.00116 | 0.01577 |
# | 24 | module.layer3.0.conv1.weight | (256, 512, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02737 | -0.00126 | 0.01964 |
# | 25 | module.layer3.0.conv2.weight | (128, 256, 3, 3) | 294912 | 294912 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01679 | -0.00019 | 0.01241 |
# | 26 | module.layer3.0.conv3.weight | (1024, 128, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02290 | -0.00043 | 0.01647 |
# | 27 | module.layer3.0.downsample.0.weight | (1024, 512, 1, 1) | 524288 | 524288 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01431 | -0.00000 | 0.00982 |
# | 28 | module.layer3.1.conv1.weight | (128, 1024, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01517 | -0.00037 | 0.01072 |
# | 29 | module.layer3.1.conv2.weight | (128, 128, 3, 3) | 147456 | 147456 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01683 | -0.00006 | 0.01212 |
# | 30 | module.layer3.1.conv3.weight | (1024, 128, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01959 | -0.00063 | 0.01394 |
# | 31 | module.layer3.2.conv1.weight | (128, 1024, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01547 | -0.00032 | 0.01103 |
# | 32 | module.layer3.2.conv2.weight | (128, 128, 3, 3) | 147456 | 147456 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01644 | -0.00056 | 0.01214 |
# | 33 | module.layer3.2.conv3.weight | (1024, 128, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01832 | -0.00054 | 0.01331 |
# | 34 | module.layer3.3.conv1.weight | (128, 1024, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01675 | -0.00058 | 0.01250 |
# | 35 | module.layer3.3.conv2.weight | (128, 128, 3, 3) | 147456 | 147456 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01552 | -0.00053 | 0.01179 |
# | 36 | module.layer3.3.conv3.weight | (1024, 128, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01741 | -0.00095 | 0.01280 |
# | 37 | module.layer3.4.conv1.weight | (128, 1024, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01738 | -0.00080 | 0.01312 |
# | 38 | module.layer3.4.conv2.weight | (128, 128, 3, 3) | 147456 | 147456 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01539 | -0.00064 | 0.01169 |
# | 39 | module.layer3.4.conv3.weight | (1024, 128, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01709 | -0.00126 | 0.01253 |
# | 40 | module.layer3.5.conv1.weight | (128, 1024, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01868 | -0.00072 | 0.01434 |
# | 41 | module.layer3.5.conv2.weight | (128, 128, 3, 3) | 147456 | 147456 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01528 | -0.00073 | 0.01170 |
# | 42 | module.layer3.5.conv3.weight | (1024, 128, 1, 1) | 131072 | 131072 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01853 | -0.00212 | 0.01393 |
# | 43 | module.layer4.0.conv1.weight | (256, 1024, 1, 1) | 262144 | 262144 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.02219 | -0.00087 | 0.01715 |
# | 44 | module.layer4.0.conv2.weight | (256, 256, 3, 3) | 589824 | 589824 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01234 | -0.00011 | 0.00962 |
# | 45 | module.layer4.0.conv3.weight | (2048, 256, 1, 1) | 524288 | 524288 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01454 | -0.00058 | 0.01133 |
# | 46 | module.layer4.0.downsample.0.weight | (2048, 1024, 1, 1) | 2097152 | 2097152 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00905 | -0.00018 | 0.00689 |
# | 47 | module.layer4.1.conv1.weight | (256, 2048, 1, 1) | 524288 | 524288 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01431 | -0.00032 | 0.01119 |
# | 48 | module.layer4.1.conv2.weight | (256, 256, 3, 3) | 589824 | 589824 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01231 | -0.00060 | 0.00965 |
# | 49 | module.layer4.1.conv3.weight | (2048, 256, 1, 1) | 524288 | 524288 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01433 | 0.00003 | 0.01110 |
# | 50 | module.layer4.2.conv1.weight | (256, 2048, 1, 1) | 524288 | 524288 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01778 | -0.00008 | 0.01397 |
# | 51 | module.layer4.2.conv2.weight | (256, 256, 3, 3) | 589824 | 589824 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01080 | -0.00034 | 0.00850 |
# | 52 | module.layer4.2.conv3.weight | (2048, 256, 1, 1) | 524288 | 524288 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.01315 | 0.00019 | 0.00992 |
# | 53 | module.fc.weight | (1000, 2048) | 2048000 | 2048000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.03325 | 0.00000 | 0.02289 |
# | 54 | Total sparsity: | - | 12671168 | 12671168 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 |
# +----+-------------------------------------+--------------------+---------------+----------------+------------+------------+----------+----------+----------+------------+---------+----------+------------+
# 2018-12-09 13:27:25,875 - Total sparsity: 0.00
#
# 2018-12-09 13:27:25,875 - --- validate (epoch=99)-----------
# 2018-12-09 13:27:25,875 - 50000 samples (256 per mini-batch)
# 2018-12-09 13:27:46,138 - Epoch: [99][ 50/ 195] Loss 0.728680 Top1 80.640625 Top5 95.507812
# 2018-12-09 13:27:53,943 - Epoch: [99][ 100/ 195] Loss 0.850403 Top1 78.128906 Top5 94.128906
# 2018-12-09 13:28:03,180 - Epoch: [99][ 150/ 195] Loss 0.973435 Top1 75.731771 Top5 92.619792
# 2018-12-09 13:28:10,151 - ==> Top1: 74.738 Top5: 92.080 Loss: 1.018
#
# 2018-12-09 13:28:10,230 - ==> Best Top1: 75.896 on Epoch: 0
# 2018-12-09 13:28:10,230 - ==> Best Top1: 75.402 on Epoch: 1
# 2018-12-09 13:28:10,230 - ==> Best Top1: 74.916 on Epoch: 2
# 2018-12-09 13:28:10,230 - ==> Best Top1: 74.782 on Epoch: 94 <==========
# 2018-12-09 13:28:10,230 - ==> Best Top1: 74.776 on Epoch: 93
# 2018-12-09 13:28:10,230 - ==> Best Top1: 74.774 on Epoch: 84
# 2018-12-09 13:28:10,230 - ==> Best Top1: 74.772 on Epoch: 97
# 2018-12-09 13:28:10,231 - ==> Best Top1: 74.770 on Epoch: 98
# 2018-12-09 13:28:10,231 - ==> Best Top1: 74.738 on Epoch: 99
# 2018-12-09 13:28:10,231 - ==> Best Top1: 74.726 on Epoch: 91
# 2018-12-09 13:28:10,231 - Saving checkpoint to: logs/resnet50_filters_v3.1___2018.12.07-154945/resnet50_filters_v3.1_checkpoint.pth.tar
# 2018-12-09 13:28:10,458 - --- test ---------------------
# 2018-12-09 13:28:10,458 - 50000 samples (256 per mini-batch)
# 2018-12-09 13:28:30,687 - Test: [ 50/ 195] Loss 0.728680 Top1 80.640625 Top5 95.507812
# 2018-12-09 13:28:38,854 - Test: [ 100/ 195] Loss 0.850403 Top1 78.128906 Top5 94.128906
# 2018-12-09 13:28:47,691 - Test: [ 150/ 195] Loss 0.973435 Top1 75.731771 Top5 92.619792
# 2018-12-09 13:28:54,669 - ==> Top1: 74.738 Top5: 92.080 Loss: 1.018
version: 1
pruners:
fc_pruner:
class: AutomatedGradualPruner
initial_sparsity : 0.05
final_sparsity: 0.87
weights: module.fc.weight
filter_pruner:
class: L1RankedStructureParameterPruner_AGP
initial_sparsity : 0.05
final_sparsity: 0.50
group_type: Filters
weights: [module.layer1.0.conv1.weight,
module.layer1.1.conv1.weight,
module.layer1.2.conv1.weight,
#module.layer2.0.conv1.weight,
module.layer2.1.conv1.weight,
module.layer2.2.conv1.weight,
module.layer2.3.conv1.weight,
#module.layer3.0.conv1.weight,
module.layer3.1.conv1.weight,
module.layer3.2.conv1.weight,
module.layer3.3.conv1.weight,
module.layer3.4.conv1.weight,
module.layer3.5.conv1.weight,
module.layer4.0.conv1.weight,
module.layer4.1.conv1.weight,
module.layer4.2.conv1.weight,
module.layer1.0.conv2.weight,
module.layer1.1.conv2.weight,
module.layer1.2.conv2.weight,
module.layer2.0.conv2.weight,
#module.layer2.1.conv2.weight,
module.layer2.2.conv2.weight,
module.layer2.3.conv2.weight,
module.layer3.0.conv2.weight,
module.layer3.1.conv2.weight,
module.layer3.2.conv2.weight,
module.layer3.3.conv2.weight,
module.layer3.4.conv2.weight,
module.layer3.5.conv2.weight,
module.layer4.0.conv2.weight,
module.layer4.1.conv2.weight,
module.layer4.2.conv2.weight]
fine_pruner:
class: AutomatedGradualPruner
initial_sparsity : 0.05
final_sparsity: 0.70
weights: [
module.layer4.0.conv2.weight,
module.layer4.0.conv3.weight,
module.layer4.0.downsample.0.weight,
module.layer4.1.conv1.weight,
module.layer4.1.conv2.weight,
module.layer4.1.conv3.weight,
module.layer4.2.conv1.weight,
module.layer4.2.conv2.weight,
module.layer4.2.conv3.weight]
extensions:
net_thinner:
class: 'FilterRemover'
thinning_func_str: remove_filters
arch: 'resnet50'
dataset: 'imagenet'
lr_schedulers:
pruning_lr:
class: ExponentialLR
gamma: 0.95
policies:
- pruner:
instance_name : filter_pruner
# args:
# mini_batch_pruning_frequency: 1
starting_epoch: 0
ending_epoch: 30
frequency: 1
# After completeing the pruning, we perform network thinning and continue fine-tuning.
- extension:
instance_name: net_thinner
epochs: [31]
- lr_scheduler:
instance_name: pruning_lr
starting_epoch: 40
ending_epoch: 80
frequency: 1