forked from cvlab-columbia/viper
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
65 changed files
with
19,376 additions
and
6,216 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
96 changes: 96 additions & 0 deletions
96
configs/ablations/v+d_120_blip2_df_gpt35_simple_mainset.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
multiprocessing: True # Run the models and samples in parallel | ||
path_pretrained_models: './pretrained_models' # Path to the pretrained models | ||
execute_code: True # Execute the code after generating it. Only applies to main_batch | ||
|
||
results_dir: ./results/v+d_120_blip2_df_gpt35_simple_mainset # Directory to save the results | ||
use_cached_codex: True # Use previously-computed Codex results | ||
cached_codex_path: './cached_code/v+d_df_gpt35_simple/v1.csv' # Path to the csv results file from which to load Codex results | ||
|
||
vqa_model: blip # should be one of the following: gemini, blip | ||
face_model: deepface # should be one of the following: deepface, label | ||
|
||
dataset: | ||
data_path: /project/mira/dataset/MovieNet | ||
anno_path: ../tropes/tim_mainset_20_trope_50_movie.json | ||
dataset_name: TiM | ||
version: multiplechoice | ||
fps: 10 | ||
max_num_frames: 120 | ||
split: val | ||
batch_size: 1 | ||
# max_samples: 1 | ||
start_sample: 0 | ||
|
||
codex: | ||
api_prompt: ./prompts/benchmarks/tim/face_dialogue/api.py | ||
function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt | ||
function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt | ||
example_prompt: [ | ||
./prompts/benchmarks/tim/face_dialogue/examples/asshole_victim.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/downer_ending.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/kick_the_dog.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/love_at_first_sight.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/stealth_pun.py | ||
] | ||
prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt | ||
temperature: 0. # Temperature for Codex. (Almost) deterministic if 0 | ||
best_of: 1 # Number of tries to choose from. Use when temperature > 0 | ||
max_tokens: 4096 # Maximum number of tokens to generate for Codex | ||
prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits) | ||
model: gpt-3.5-turbo-0125 # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models | ||
|
||
load_models: # Which pretrained models to load | ||
maskrcnn: True | ||
clip: False | ||
glip: True | ||
owlvit: False | ||
tcl: False | ||
gpt3_qa: True | ||
gpt3_general: True | ||
gpt3_summarize: True | ||
depth: False | ||
blip: True | ||
saliency: False | ||
xvlm: False | ||
codex: True | ||
codellama: False | ||
reflection: False | ||
gemini: False | ||
deepface: True | ||
|
||
# Other unimportant parameters | ||
gpt3: # GPT-3 configuration | ||
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0 | ||
qa_prompt: ./prompts/gpt3/gpt3_qa.txt | ||
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt | ||
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0 | ||
model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models | ||
|
||
# some model parameter | ||
detect_thresholds: # Thresholds for the models that perform detection | ||
glip: 0.5 | ||
maskrcnn: 0.8 | ||
owlvit: 0.1 | ||
ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned | ||
crop_larger_margin: True # Increase size of crop by 10% to include more context | ||
verify_property: # Parameters for verify_property | ||
model: xvlm # Model to use for verify_property | ||
thresh_clip: 0.6 | ||
thresh_tcl: 0.25 | ||
thresh_xvlm: 0.6 | ||
best_match_model: xvlm # Which model to use for best_[image, text]_match | ||
blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True | ||
blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2 | ||
|
||
# Saving and loading parameters | ||
save: True # Save the results to a file | ||
save_new_results: True # If False, overwrite the results file | ||
use_cache: False # Use cache for the models that support it (now, GPT-3) | ||
clear_cache: False # Clear stored cache | ||
log_every: 20 # Log accuracy every n batches | ||
wandb: False | ||
|
||
# reasoning prompt | ||
select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt | ||
fixed_code_file: ./prompts/fixed_code/blip2_video.prompt | ||
use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex) |
96 changes: 96 additions & 0 deletions
96
configs/ablations/v+d_120_blip2_df_gpt4_simple_mainset.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
multiprocessing: True # Run the models and samples in parallel | ||
path_pretrained_models: './pretrained_models' # Path to the pretrained models | ||
execute_code: True # Execute the code after generating it. Only applies to main_batch | ||
|
||
results_dir: ./results/v+d_120_blip2_df_gpt4_simple_mainset # Directory to save the results | ||
use_cached_codex: True # Use previously-computed Codex results | ||
cached_codex_path: './cached_code/v+d_df_gpt4_simple/v1.csv' # Path to the csv results file from which to load Codex results | ||
|
||
vqa_model: blip # should be one of the following: gemini, blip | ||
face_model: deepface # should be one of the following: deepface, label | ||
|
||
dataset: | ||
data_path: /work/adnchao1/MovieNet | ||
anno_path: ../tropes/tim_mainset_20_trope_50_movie.json | ||
dataset_name: TiM | ||
version: multiplechoice | ||
fps: 10 | ||
max_num_frames: 120 | ||
split: val | ||
batch_size: 16 | ||
# max_samples: 1 | ||
start_sample: 0 | ||
|
||
codex: | ||
api_prompt: ./prompts/benchmarks/tim/face_dialogue/api.py | ||
function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt | ||
function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt | ||
example_prompt: [ | ||
./prompts/benchmarks/tim/face_dialogue/examples/asshole_victim.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/downer_ending.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/kick_the_dog.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/love_at_first_sight.py, | ||
./prompts/benchmarks/tim/face_dialogue/examples/stealth_pun.py | ||
] | ||
prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt | ||
temperature: 0. # Temperature for Codex. (Almost) deterministic if 0 | ||
best_of: 1 # Number of tries to choose from. Use when temperature > 0 | ||
max_tokens: 4096 # Maximum number of tokens to generate for Codex | ||
prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits) | ||
model: gpt-4-turbo # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models | ||
|
||
load_models: # Which pretrained models to load | ||
maskrcnn: True | ||
clip: False | ||
glip: True | ||
owlvit: False | ||
tcl: False | ||
gpt3_qa: True | ||
gpt3_general: True | ||
gpt3_summarize: True | ||
depth: False | ||
blip: True | ||
saliency: False | ||
xvlm: False | ||
codex: False | ||
codellama: False | ||
reflection: False | ||
gemini: False | ||
deepface: True | ||
|
||
# Other unimportant parameters | ||
gpt3: # GPT-3 configuration | ||
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0 | ||
qa_prompt: ./prompts/gpt3/gpt3_qa.txt | ||
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt | ||
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0 | ||
model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models | ||
|
||
# some model parameter | ||
detect_thresholds: # Thresholds for the models that perform detection | ||
glip: 0.5 | ||
maskrcnn: 0.8 | ||
owlvit: 0.1 | ||
ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned | ||
crop_larger_margin: True # Increase size of crop by 10% to include more context | ||
verify_property: # Parameters for verify_property | ||
model: xvlm # Model to use for verify_property | ||
thresh_clip: 0.6 | ||
thresh_tcl: 0.25 | ||
thresh_xvlm: 0.6 | ||
best_match_model: xvlm # Which model to use for best_[image, text]_match | ||
blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True | ||
blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2 | ||
|
||
# Saving and loading parameters | ||
save: True # Save the results to a file | ||
save_new_results: True # If False, overwrite the results file | ||
use_cache: False # Use cache for the models that support it (now, GPT-3) | ||
clear_cache: False # Clear stored cache | ||
log_every: 20 # Log accuracy every n batches | ||
wandb: False | ||
|
||
# reasoning prompt | ||
select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt | ||
fixed_code_file: ./prompts/fixed_code/blip2_video.prompt | ||
use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex) |
96 changes: 96 additions & 0 deletions
96
configs/ablations/v+d_120_blip2_none_gpt4_simple_mainset.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
multiprocessing: True # Run the models and samples in parallel | ||
path_pretrained_models: './pretrained_models' # Path to the pretrained models | ||
execute_code: True # Execute the code after generating it. Only applies to main_batch | ||
|
||
results_dir: ./results/v+d_120_blip2_none_gpt4_simple_mainset # Directory to save the results | ||
use_cached_codex: True # Use previously-computed Codex results | ||
cached_codex_path: './cached_code/v+d_none_gpt4_simple/v1.csv' # Path to the csv results file from which to load Codex results | ||
|
||
vqa_model: blip # should be one of the following: gemini, blip | ||
face_model: deepface # should be one of the following: deepface, label | ||
|
||
dataset: | ||
data_path: /work/adnchao1/MovieNet | ||
anno_path: ../tropes/tim_mainset_20_trope_50_movie.json | ||
dataset_name: TiM | ||
version: multiplechoice | ||
fps: 10 | ||
max_num_frames: 120 | ||
split: val | ||
batch_size: 16 | ||
# max_samples: 1 | ||
start_sample: 0 | ||
|
||
codex: | ||
api_prompt: ./prompts/benchmarks/tim/dialogue/api.py | ||
function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt | ||
function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt | ||
example_prompt: [ | ||
./prompts/benchmarks/tim/dialogue/examples/asshole_victim.py, | ||
./prompts/benchmarks/tim/dialogue/examples/downer_ending.py, | ||
./prompts/benchmarks/tim/dialogue/examples/kick_the_dog.py, | ||
./prompts/benchmarks/tim/dialogue/examples/love_at_first_sight.py, | ||
./prompts/benchmarks/tim/dialogue/examples/stealth_pun.py | ||
] | ||
prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt | ||
temperature: 0. # Temperature for Codex. (Almost) deterministic if 0 | ||
best_of: 1 # Number of tries to choose from. Use when temperature > 0 | ||
max_tokens: 4096 # Maximum number of tokens to generate for Codex | ||
prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits) | ||
model: gpt-4-turbo # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models | ||
|
||
load_models: # Which pretrained models to load | ||
maskrcnn: True | ||
clip: False | ||
glip: True | ||
owlvit: False | ||
tcl: False | ||
gpt3_qa: True | ||
gpt3_general: True | ||
gpt3_summarize: True | ||
depth: False | ||
blip: True | ||
saliency: False | ||
xvlm: False | ||
codex: False | ||
codellama: False | ||
reflection: False | ||
gemini: False | ||
deepface: False | ||
|
||
# Other unimportant parameters | ||
gpt3: # GPT-3 configuration | ||
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0 | ||
qa_prompt: ./prompts/gpt3/gpt3_qa.txt | ||
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt | ||
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0 | ||
model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models | ||
|
||
# some model parameter | ||
detect_thresholds: # Thresholds for the models that perform detection | ||
glip: 0.5 | ||
maskrcnn: 0.8 | ||
owlvit: 0.1 | ||
ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned | ||
crop_larger_margin: True # Increase size of crop by 10% to include more context | ||
verify_property: # Parameters for verify_property | ||
model: xvlm # Model to use for verify_property | ||
thresh_clip: 0.6 | ||
thresh_tcl: 0.25 | ||
thresh_xvlm: 0.6 | ||
best_match_model: xvlm # Which model to use for best_[image, text]_match | ||
blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True | ||
blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2 | ||
|
||
# Saving and loading parameters | ||
save: True # Save the results to a file | ||
save_new_results: True # If False, overwrite the results file | ||
use_cache: False # Use cache for the models that support it (now, GPT-3) | ||
clear_cache: False # Clear stored cache | ||
log_every: 20 # Log accuracy every n batches | ||
wandb: False | ||
|
||
# reasoning prompt | ||
select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt | ||
fixed_code_file: ./prompts/fixed_code/blip2_video.prompt | ||
use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex) |
Oops, something went wrong.