-
Notifications
You must be signed in to change notification settings - Fork 8
/
mg_stats_plotter.py
237 lines (219 loc) · 12.1 KB
/
mg_stats_plotter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
from collections import Counter
from omegaconf import OmegaConf
from pathlib import Path
from utils import mg_utils as utils
import matplotlib; matplotlib.use('Agg')
import json
import matplotlib.pyplot as plt
import numpy as np
def plot_emotic_emo_freq(all_mg, emotic_mapping_path):
"""
Plots the frequency of emotions in the Emotic dataset.
The plot is saved in plots/emotic_emo_freq.pdf
Args:
all_mg (list): List of all the MovieGraph objects.
emotic_mapping_path (str): Path to the emotic_mapping.json file.
"""
FILENAME = "emotic_emo_freq.pdf"
emos = ['loving', 'friendly', 'angry', 'resentful', 'outraged', 'vengeful', 'annoyed', 'annoying', 'frustrated', 'irritated', 'agitated', 'bitter', 'insensitive', 'exasperated',
'displeased', 'optimistic', 'hopeful', 'imaginative', 'eager', 'disgusted', 'horrified', 'hateful', 'confident', 'proud', 'stubborn', 'defiant', 'independent', 'convincing',
'disapproving', 'hostile', 'unfriendly', 'mean', 'disrespectful', 'mocking', 'condescending', 'cunning', 'manipulative', 'nasty', 'deceitful', 'conceited', 'sleazy', 'greedy',
'rebellious', 'petty', 'indifferent', 'bored', 'distracted', 'distant', 'uninterested', 'self-centered', 'lonely', 'cynical', 'restrained', 'unimpressed', 'dismissive', 'worried',
'nervous', 'tense', 'anxious', 'afraid', 'alarmed', 'suspicious', 'uncomfortable', 'hesitant', 'reluctant', 'insecure', 'stressed', 'unsatisfied', 'solemn', 'submissive', 'confused',
'skeptical', 'indecisive', 'embarrassed', 'ashamed', 'humiliated', 'curious', 'serious', 'intrigued', 'persistent', 'interested', 'attentive', 'fascinated', 'respectful', 'grateful',
'excited', 'enthusiastic', 'energetic', 'playful', 'impatient', 'panicky', 'impulsive', 'hasty', 'tired', 'sleepy', 'dizzy', 'scared', 'fearful', 'timid', 'terrified', 'cheerful',
'delighted', 'happy', 'amused', 'laughing', 'thrilled', 'smiling', 'pleased', 'overwhelmed', 'ecstatic', 'exuberant', 'hurt', 'content', 'relieved', 'relaxed', 'calm', 'quiet',
'satisfied', 'reserved', 'carefree', 'funny', 'attracted', 'aroused', 'hedonistic', 'pleasant', 'flattered', 'entertaining', 'mesmerized', 'sad', 'melancholy', 'upset',
'disappointed', 'discouraged', 'grumpy', 'crying', 'regretful', 'grief-stricken', 'depressed', 'heartbroken', 'remorseful', 'hopeless', 'pensive', 'miserable', 'apologetic',
'nostalgic', 'offended', 'insulted', 'ignorant', 'disturbed', 'abusive', 'offensive', 'surprise', 'surprised', 'shocked', 'amazed', 'startled', 'astonished', 'speechless',
'disbelieving', 'incredulous', 'kind', 'compassionate', 'supportive', 'sympathetic', 'encouraging', 'thoughtful', 'understanding', 'generous', 'concerned', 'dependable',
'caring', 'forgiving', 'reassuring', 'gentle', 'jealous', 'determined', 'aggressive', 'desperate', 'focused', 'dedicated', 'diligent']
all_emos, _ = utils.get_all_emotions(all_mg)
freq = Counter(all_emos)
with open(emotic_mapping_path, 'r') as f:
mapping = json.load(f)
rev_map = dict()
for emot, emos in mapping.items():
for emo in emos:
rev_map[emo] = emot
top_k_emo_freq_pairs = dict()
for emot, emos in mapping.items():
top_k_emo_freq_pairs[emot] = 0
for emo in emos:
top_k_emo_freq_pairs[emot] += freq[emo]
top_k_emo_freq_pairs = sorted(top_k_emo_freq_pairs.items(), key=lambda x: -x[1])
plt.figure(figsize=(20, 10), dpi=300)
x = [emo[0] for emo in top_k_emo_freq_pairs]
y = [frq[1] for frq in top_k_emo_freq_pairs]
plt.bar(range(len(x)), y)
for i, h in enumerate(y):
plt.text(i, h, s=h, color="black", horizontalalignment="center", fontsize=20)
plt.xticks(range(len(x)), x, rotation=90, fontsize=30)
plt.yticks(fontsize=20)
plt.ylabel("Frequency of movie scenes", fontsize=30)
plt.savefig("plots/" + FILENAME, bbox_inches="tight")
print("Generated plots/" + FILENAME)
def plot_t10_25_emo_freq(all_mg):
"""
Plot the top-10 and top-25 emotions from MovieGraphs dataset based on frequency.
The plot is saved in plots/top_10_25_emo_freq.pdf.
Args:
all_mg (list): List of MovieGraphs.
"""
FILENAME = "top_10_25_emo_freq.pdf"
all_emos, _ = utils.get_all_emotions(all_mg)
sorted_emo_freq = sorted(Counter(all_emos).items(), key=lambda x: -x[1])
top_k_emo_freq_pairs_25 = [(emo, freq) for emo, freq in sorted_emo_freq][:25]
top_k_emo_freq_pairs_10 = [(emo, freq) for emo, freq in sorted_emo_freq][:10]
plt.figure(figsize=(20, 10), dpi=300)
x = [emo[0] for emo in top_k_emo_freq_pairs_25]
y = [frq[1] for frq in top_k_emo_freq_pairs_25]
plt.bar(range(len(x)), y, label="Top-25")
x1 = [emo[0] for emo in top_k_emo_freq_pairs_10]
y1 = [frq[1] for frq in top_k_emo_freq_pairs_10]
plt.bar(range(len(x1)), y1, label="Top-10")
for i, h in enumerate(y):
plt.text(i, h, s=h, color="black", horizontalalignment="center", fontsize=20)
plt.xticks(range(len(x)), x, rotation=90, fontsize=30)
plt.yticks(fontsize=20)
plt.ylabel("Frequency of movie scenes", fontsize=30)
plt.legend(fontsize=25)
plt.savefig("plots/" + FILENAME, bbox_inches="tight")
print("Generated plots/" + FILENAME)
def plot_char_vs_clips_stats(all_mg, MG_ROOT):
"""
Plot the number of characters vs number of scenes in MovieGraphs dataset.
The plot is saved in plots/char_freq_MG.pdf.
Args:
all_mg (list): List of MovieGraphs.
MG_ROOT (str): Path to MovieGraphs resources.
"""
FILENAME = "char_freq_MG.pdf"
data_split = utils.read_train_val_test_splits(MG_ROOT)
movie_char_freq_map = dict()
movie_char_freq, movie_char_counts = list(), list()
for movie_id in all_mg:
cgs = all_mg[movie_id].clip_graphs
characters = list()
for cg_id in cgs:
characters += [pair[0].lower() for pair in cgs[cg_id].get_characters()]
char_freq = Counter(characters)
char_freq = dict(sorted(char_freq.items(), key=lambda x: -x[1]))
movie_char_freq_map[movie_id] = char_freq
movie_char_freq.append(list(char_freq.values()))
movie_char_counts.append(len(char_freq.keys()))
movie_char_counts = np.array(movie_char_counts)
movie_char_counts[::-1].sort()
## Plotting average occurances of first top 25 (min) character's occurences
avg_occurences = np.zeros(min(movie_char_counts)) # Min: 25 | Max: 122
max_occurences = np.zeros(min(movie_char_counts))
min_occurences = np.ones(min(movie_char_counts))*9999
for ndx in range(min(movie_char_counts)):
freqs = movie_char_freq[ndx][:min(movie_char_counts)]
avg_occurences += freqs
min_occurences = np.minimum(min_occurences, freqs)
max_occurences = np.maximum(max_occurences, freqs)
avg_occurences /= len(movie_char_freq)
plt.figure(figsize=(15, 8), dpi=300)
x = np.arange(min(movie_char_counts))
plt.bar(x, avg_occurences)
for i, h in enumerate(avg_occurences):
text = "{:.1f}".format(avg_occurences[i])
plt.text(i, h, s=text, color="black", horizontalalignment="center", fontsize=15)
plt.xticks(x, x+1, fontsize=20)
plt.yticks(fontsize=22)
plt.xlabel("Character index", fontsize=25)
plt.ylabel("Average number of movie scenes \nin which character appears", fontsize=25) # appearance across movie scenes
plt.savefig("plots/" + FILENAME, bbox_inches="tight")
print("Generated plots/" + FILENAME)
def plot_emo_vs_clip_stats(all_mg, MG_ROOT):
"""
Plot the number of emotions vs number of scenes in MovieGraphs dataset.
Generates two plots, one is saved in plots/emo_freq_MG.pdf and the other
is saved at plots/emo_freq_MG_from_train_val_test.pdf.
Args:
all_mg (list): List of MovieGraphs.
MG_ROOT (str): Path to MovieGraphs resources.
"""
data_split = utils.read_train_val_test_splits(MG_ROOT)
FILENAME_MG = "emo_counts_in_ClipGraphs.pdf"
FILENAME_SPLIT = "emo_counts_in_ClipGraphs_from_train_val_test.pdf"
movie_emoCounts_map, emoCounts = dict(), list()
train_emo_counts, val_emo_counts, test_emo_counts = list(), list(), list()
for movie_id in all_mg:
cgs = all_mg[movie_id].clip_graphs
movie_emoCounts_map[movie_id] = list()
for cg_id in cgs:
emotions = utils.get_emotions_from_cg(cgs[cg_id])
movie_emoCounts_map[movie_id].append(len(emotions))
emoCounts += movie_emoCounts_map[movie_id]
for movie_id in data_split["train"]:
train_emo_counts += movie_emoCounts_map[movie_id]
for movie_id in data_split["val"]:
val_emo_counts += movie_emoCounts_map[movie_id]
for movie_id in data_split["test"]:
test_emo_counts += movie_emoCounts_map[movie_id]
emoCounts_freq = Counter(emoCounts)
emoCounts_freq = dict(sorted(emoCounts_freq.items(), key=lambda x: x[0]))
tr_emoCounts_fr = Counter(train_emo_counts)
tr_emoCounts_fr = dict(sorted(tr_emoCounts_fr.items(), key=lambda x: x[0]))
vl_emoCounts_fr = Counter(val_emo_counts)
vl_emoCounts_fr = dict(sorted(vl_emoCounts_fr.items(), key=lambda x: x[0]))
ts_emoCounts_fr = Counter(test_emo_counts)
ts_emoCounts_fr = dict(sorted(ts_emoCounts_fr.items(), key=lambda x: x[0]))
## Plotting trend for entire MG
x = np.arange(len(emoCounts_freq.keys()))
y = np.array(list(emoCounts_freq.values()))
plt.figure(figsize=(15, 8), dpi=300)
plt.bar(x, y)
for i, h in enumerate(y):
plt.text(i, h, s=h, color="black", horizontalalignment="center", fontsize=20)
plt.xticks(x, list(emoCounts_freq.keys()), fontsize=22)
plt.yticks(fontsize=22)
plt.xlabel("Count of emotion labels", fontsize=30)
plt.ylabel("Number of scenes", fontsize=30)
# plt.title("Number of emotions present in {} ClipGraphs".format(len(emoCounts)))
plt.savefig("plots/" + FILENAME_MG)
print("Generated plots/" + FILENAME_MG)
## Plotting trend for train_val_test splits
x = np.arange(len(emoCounts_freq.keys()))
y = {"train":list(), "val":list(), "test":list()}
for k in emoCounts_freq.keys():
y["train"].append(tr_emoCounts_fr[k] if k in tr_emoCounts_fr else 0)
y["val"].append(vl_emoCounts_fr[k] if k in vl_emoCounts_fr else 0)
y["test"].append(ts_emoCounts_fr[k] if k in ts_emoCounts_fr else 0)
train_y = np.array(y["train"])
val_y = np.array(y["val"])
test_y = np.array(y["test"])
plt.figure(figsize=(15, 8), dpi=300)
plt.bar(x-0.25, train_y, width=0.25, label="Train: {} Scenes".format(train_y.sum()))
plt.bar(x, val_y, width=0.25, label="Val: {} Scenes".format(val_y.sum()))
plt.bar(x+0.25, test_y, width=0.25, label="Test: {} Scenes".format(test_y.sum()))
plt.legend(fontsize=25)
plt.xticks(x, list(emoCounts_freq.keys()), fontsize=22)
plt.yticks(fontsize=22)
plt.xlabel("Count of emotion labels", fontsize=30)
plt.ylabel("Number of scenes", fontsize=30)
# plt.title("Number of emotions in ClipGraphs from Train-Val-Test splits")
plt.savefig("plots/" + FILENAME_SPLIT, bbox_inches="tight")
print("Generated plots/" + FILENAME_SPLIT)
def get_config():
"""
Loads the config file and overrides the hyperparameters from the command line.
"""
base_conf = OmegaConf.load("config.yaml")
overrides = OmegaConf.from_cli()
updated_conf = OmegaConf.merge(base_conf, overrides)
return OmegaConf.to_container(updated_conf)
if __name__ == "__main__":
save_path = Path("./plots/")
save_path.mkdir(parents=True, exist_ok=True)
print("Created a new directory: {} to save generated plots".format(save_path))
config = get_config()
custom_mapped_emotions = utils.read_custom_mapped_emotions(config['resource_path'])
all_mg = utils.read_mg_pkl(config['pkl_path'], config['pkl_name'])
plot_char_vs_clips_stats(all_mg, config["resource_path"])
plot_emo_vs_clip_stats(all_mg, config["resource_path"])
plot_t10_25_emo_freq(all_mg)
plot_emotic_emo_freq(all_mg, config["emotic_mapping_path"])
print("Done!")