-
Notifications
You must be signed in to change notification settings - Fork 1
/
model_org_sim_entr_gain.py
169 lines (148 loc) · 7.41 KB
/
model_org_sim_entr_gain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import numpy as np
import os
import torch
import torchvision.models as models
# from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import sys
import math
import torch.nn.init as init
import logging
from torch.nn.parameter import Parameter
from models import *
from RAFT_test import demo
import matplotlib.pyplot as plt
def save_model(model, iter, name):
torch.save(model.state_dict(), os.path.join(name, "iter_{}.pth.tar".format(iter)))
def load_model_rev(model, f, name):
with open(f, 'rb') as f:
pretrained_dict = torch.load(f)
model_dict = model.state_dict()
new_dict = {}
# for k, _ in pretrained_dict.items():
# print("this keys are in the original model", k)
for k, _ in model_dict.items():
# print("this keys are in the combined model", k)
if (k == "deconv1.weight"):
new_dict[k] = pretrained_dict[name + k][:192,:,:,:]
else:
# print("This are the keys in the Encoder and Decoder:", k)
new_dict[k] = pretrained_dict[name + k]
# print(name + k, pretrained_dict[name + k].size())
model_dict.update(new_dict)
model.load_state_dict(model_dict)
# path = '/scratch/hunseok_root/hunseok0/mrakeshc/flickr_dataset/checkpoint/with_attention_rev_32/iter_979751.pth.tar'
def load_model_deep(model, f, name):
with open(f, 'rb') as f:
pretrained_dict = torch.load(f)
model_dict = model.state_dict()
new_dict = {}
# for k, _ in pretrained_dict.items():
# print("this keys are in the original model", k)
for k, _ in model_dict.items():
# print("this keys are in the combined model", k)
if (k == "deconv1.weight"):
new_dict[k] = pretrained_dict[name + k][:192,:,:,:]
else:
# print("The weights are being loaded")
# print("This are the keys in the Encoder and Decoder:", k)
new_dict[k] = pretrained_dict[name + k]
# print(name + k, pretrained_dict[name + k].size())
# smaple
model_dict.update(new_dict)
model.load_state_dict(model_dict)
path = '/home/mrakeshc/NIC/code/output/msssim4.pth.tar'
# path = '/home/mrakeshc/NIC/code/output_1/mse400.pth.tar'
def load_model(model, f):
with open(f, 'rb') as f:
pretrained_dict = torch.load(f)
model_dict = model.state_dict()
new_dict = {}
# print(model_dict.keys())
# for k, v in pretrained_dict.items():
# if (k in model_dict) and (k != "Encoder.conv1.weight") and (k != "Encoder.conv1.bias"):
# # print("these are the keys:",k)
# new_dict[k] = v
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
# model_dict.update(new_dict)
model.load_state_dict(model_dict)
f = str(f)
if f.find('iter_') != -1 and f.find('.pth') != -1:
st = f.find('iter_') + 5
ed = f.find('.pth', st)
return int(f[st:ed])
else:
return 0
class VideoCoder(nn.Module):
def __init__(self, feature_channel=192, latent_channel=384):
super(VideoCoder, self).__init__()
self.Encoder = Feature_encoder(feature_channel)
self.Decoder = Feature_decoder(in_channel=feature_channel, mid_channel=feature_channel)
self.temporalPredictor = Temporal_predictor(in_channel=feature_channel*5, out_channel=feature_channel)
self.priorEncoder = Analysis_prior_net(in_channel=feature_channel, out_channel=feature_channel)
self.priorDecoder = Synthesis_prior_net(in_channel=feature_channel, out_channel=feature_channel)
self.bitEstimator_z = Bit_estimator(channel=feature_channel)
self.contextModel = Context_model_autoregressive(in_channel=feature_channel, out_channel=latent_channel)
self.entropyParameters = Entropy_parameters(in_channel=latent_channel*2, out_channel=latent_channel)
self.feature_channel = feature_channel
self.latent_channel = latent_channel
# def forward(self, prev1, prev2, cur):
def forward(self, prev1, prev2, prev3, prev4, cur, residuals_list, dir_image, diff, num_unchanged_total, num_total_blocks, index):
quant_noise_feature = torch.zeros(cur.size(0), self.feature_channel, cur.size(2) // 4, cur.size(3) // 4).cuda()
quant_noise_z = torch.zeros(cur.size(0), self.feature_channel, cur.size(2) // 8, cur.size(3) // 8).cuda()
quant_noise_feature = torch.nn.init.uniform_(torch.zeros_like(quant_noise_feature), -0.5, 0.5)
quant_noise_z = torch.nn.init.uniform_(torch.zeros_like(quant_noise_z), -0.5, 0.5)
latent_diff = torch.zeros(cur.size(0), self.feature_channel, cur.size(2) // 8, cur.size(3) // 8).cuda()
prev1_feature = self.Encoder(prev1)
prev2_feature = self.Encoder(prev2)
prev3_feature = self.Encoder(prev3)
prev4_feature = self.Encoder(prev4)
cur_feature = self.Encoder(cur)
warpped_image = demo(prev3, prev4)
warpped_features = self.Encoder(warpped_image)
batch_size = prev2_feature.size()[0]
cur_pred = self.temporalPredictor(prev1_feature, prev2_feature, prev3_feature, prev4_feature, warpped_features)
residual = cur_feature - cur_pred
residual[:,index,:,:] = 0
# residual *= latent_diff
# residual = latent_diff
# reisdual = torch.mul(residual, latent_diff)
z = self.priorEncoder(residual)
if self.training:
compressed_z = z + quant_noise_z
else:
compressed_z = torch.round(z)
# compressed_z = torch.round(z*2)/2
hd_out = self.priorDecoder(compressed_z)
feature_renorm = residual
if self.training:
compressed_feature_renorm = feature_renorm + quant_noise_feature
else:
compressed_feature_renorm = torch.round(feature_renorm)
residuals_list.append(torch.squeeze(residual.cpu()).numpy())
recon_cur = self.Decoder(cur_pred+compressed_feature_renorm)
recon_sigma = hd_out
def feature_probs_based_sigma(feature, sigma):
mu = torch.zeros_like(sigma)
sigma = sigma.clamp(1e-10, 1e10)
gaussian = torch.distributions.laplace.Laplace(mu, sigma)
probs = gaussian.cdf(feature + 0.5) - gaussian.cdf(feature - 0.5)
total_bits = torch.sum(torch.clamp(-1.0 * torch.log(probs + 1e-10) / math.log(2.0), 0, 50))
return total_bits, probs
def iclr18_estimate_bits_z(z):
prob = self.bitEstimator_z(z + 0.5) - self.bitEstimator_z(z - 0.5)
total_bits = torch.sum(torch.clamp(-1.0 * torch.log(prob + 1e-10) / math.log(2.0), 0, 50))
return total_bits, prob
total_bits_feature, _ = feature_probs_based_sigma(compressed_feature_renorm, recon_sigma)
total_bits_z, _ = iclr18_estimate_bits_z(compressed_z)
im_shape = cur.size()
bpp_feature = total_bits_feature / (batch_size * im_shape[2] * im_shape[3])
bpp_z = total_bits_z / (batch_size * im_shape[2] * im_shape[3])
bpp = bpp_feature + bpp_z
clipped_recon_cur = recon_cur.clamp(0., 1.)
mse_loss = torch.mean((clipped_recon_cur - cur).pow(2))
return clipped_recon_cur, mse_loss, bpp_feature, bpp_z, bpp