Skip to content

Commit

Permalink
fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
adnchao1 committed May 19, 2024
2 parents a17429a + 9fa8340 commit 4c68f27
Show file tree
Hide file tree
Showing 65 changed files with 19,376 additions and 6,216 deletions.
2,884 changes: 2,884 additions & 0 deletions cached_code/v+d_df_gpt35_simple/v1.csv

Large diffs are not rendered by default.

3,803 changes: 3,803 additions & 0 deletions cached_code/v+d_df_gpt4_simple/v1.csv

Large diffs are not rendered by default.

3,477 changes: 3,477 additions & 0 deletions cached_code/v+d_none_gpt4_simple/v1.csv

Large diffs are not rendered by default.

2,613 changes: 2,613 additions & 0 deletions cached_code/v_df_gpt35_simple/v1.csv

Large diffs are not rendered by default.

3,620 changes: 3,620 additions & 0 deletions cached_code/v_df_gpt4_simple/v1.csv

Large diffs are not rendered by default.

96 changes: 96 additions & 0 deletions configs/ablations/v+d_120_blip2_df_gpt35_simple_mainset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
multiprocessing: True # Run the models and samples in parallel
path_pretrained_models: './pretrained_models' # Path to the pretrained models
execute_code: True # Execute the code after generating it. Only applies to main_batch

results_dir: ./results/v+d_120_blip2_df_gpt35_simple_mainset # Directory to save the results
use_cached_codex: True # Use previously-computed Codex results
cached_codex_path: './cached_code/v+d_df_gpt35_simple/v1.csv' # Path to the csv results file from which to load Codex results

vqa_model: blip # should be one of the following: gemini, blip
face_model: deepface # should be one of the following: deepface, label

dataset:
data_path: /project/mira/dataset/MovieNet
anno_path: ../tropes/tim_mainset_20_trope_50_movie.json
dataset_name: TiM
version: multiplechoice
fps: 10
max_num_frames: 120
split: val
batch_size: 1
# max_samples: 1
start_sample: 0

codex:
api_prompt: ./prompts/benchmarks/tim/face_dialogue/api.py
function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt
function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt
example_prompt: [
./prompts/benchmarks/tim/face_dialogue/examples/asshole_victim.py,
./prompts/benchmarks/tim/face_dialogue/examples/downer_ending.py,
./prompts/benchmarks/tim/face_dialogue/examples/kick_the_dog.py,
./prompts/benchmarks/tim/face_dialogue/examples/love_at_first_sight.py,
./prompts/benchmarks/tim/face_dialogue/examples/stealth_pun.py
]
prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt
temperature: 0. # Temperature for Codex. (Almost) deterministic if 0
best_of: 1 # Number of tries to choose from. Use when temperature > 0
max_tokens: 4096 # Maximum number of tokens to generate for Codex
prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
model: gpt-3.5-turbo-0125 # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models

load_models: # Which pretrained models to load
maskrcnn: True
clip: False
glip: True
owlvit: False
tcl: False
gpt3_qa: True
gpt3_general: True
gpt3_summarize: True
depth: False
blip: True
saliency: False
xvlm: False
codex: True
codellama: False
reflection: False
gemini: False
deepface: True

# Other unimportant parameters
gpt3: # GPT-3 configuration
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
qa_prompt: ./prompts/gpt3/gpt3_qa.txt
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models

# some model parameter
detect_thresholds: # Thresholds for the models that perform detection
glip: 0.5
maskrcnn: 0.8
owlvit: 0.1
ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned
crop_larger_margin: True # Increase size of crop by 10% to include more context
verify_property: # Parameters for verify_property
model: xvlm # Model to use for verify_property
thresh_clip: 0.6
thresh_tcl: 0.25
thresh_xvlm: 0.6
best_match_model: xvlm # Which model to use for best_[image, text]_match
blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True
blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2

# Saving and loading parameters
save: True # Save the results to a file
save_new_results: True # If False, overwrite the results file
use_cache: False # Use cache for the models that support it (now, GPT-3)
clear_cache: False # Clear stored cache
log_every: 20 # Log accuracy every n batches
wandb: False

# reasoning prompt
select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt
fixed_code_file: ./prompts/fixed_code/blip2_video.prompt
use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex)
96 changes: 96 additions & 0 deletions configs/ablations/v+d_120_blip2_df_gpt4_simple_mainset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
multiprocessing: True # Run the models and samples in parallel
path_pretrained_models: './pretrained_models' # Path to the pretrained models
execute_code: True # Execute the code after generating it. Only applies to main_batch

results_dir: ./results/v+d_120_blip2_df_gpt4_simple_mainset # Directory to save the results
use_cached_codex: True # Use previously-computed Codex results
cached_codex_path: './cached_code/v+d_df_gpt4_simple/v1.csv' # Path to the csv results file from which to load Codex results

vqa_model: blip # should be one of the following: gemini, blip
face_model: deepface # should be one of the following: deepface, label

dataset:
data_path: /work/adnchao1/MovieNet
anno_path: ../tropes/tim_mainset_20_trope_50_movie.json
dataset_name: TiM
version: multiplechoice
fps: 10
max_num_frames: 120
split: val
batch_size: 16
# max_samples: 1
start_sample: 0

codex:
api_prompt: ./prompts/benchmarks/tim/face_dialogue/api.py
function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt
function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt
example_prompt: [
./prompts/benchmarks/tim/face_dialogue/examples/asshole_victim.py,
./prompts/benchmarks/tim/face_dialogue/examples/downer_ending.py,
./prompts/benchmarks/tim/face_dialogue/examples/kick_the_dog.py,
./prompts/benchmarks/tim/face_dialogue/examples/love_at_first_sight.py,
./prompts/benchmarks/tim/face_dialogue/examples/stealth_pun.py
]
prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt
temperature: 0. # Temperature for Codex. (Almost) deterministic if 0
best_of: 1 # Number of tries to choose from. Use when temperature > 0
max_tokens: 4096 # Maximum number of tokens to generate for Codex
prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
model: gpt-4-turbo # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models

load_models: # Which pretrained models to load
maskrcnn: True
clip: False
glip: True
owlvit: False
tcl: False
gpt3_qa: True
gpt3_general: True
gpt3_summarize: True
depth: False
blip: True
saliency: False
xvlm: False
codex: False
codellama: False
reflection: False
gemini: False
deepface: True

# Other unimportant parameters
gpt3: # GPT-3 configuration
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
qa_prompt: ./prompts/gpt3/gpt3_qa.txt
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models

# some model parameter
detect_thresholds: # Thresholds for the models that perform detection
glip: 0.5
maskrcnn: 0.8
owlvit: 0.1
ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned
crop_larger_margin: True # Increase size of crop by 10% to include more context
verify_property: # Parameters for verify_property
model: xvlm # Model to use for verify_property
thresh_clip: 0.6
thresh_tcl: 0.25
thresh_xvlm: 0.6
best_match_model: xvlm # Which model to use for best_[image, text]_match
blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True
blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2

# Saving and loading parameters
save: True # Save the results to a file
save_new_results: True # If False, overwrite the results file
use_cache: False # Use cache for the models that support it (now, GPT-3)
clear_cache: False # Clear stored cache
log_every: 20 # Log accuracy every n batches
wandb: False

# reasoning prompt
select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt
fixed_code_file: ./prompts/fixed_code/blip2_video.prompt
use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex)
96 changes: 96 additions & 0 deletions configs/ablations/v+d_120_blip2_none_gpt4_simple_mainset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
multiprocessing: True # Run the models and samples in parallel
path_pretrained_models: './pretrained_models' # Path to the pretrained models
execute_code: True # Execute the code after generating it. Only applies to main_batch

results_dir: ./results/v+d_120_blip2_none_gpt4_simple_mainset # Directory to save the results
use_cached_codex: True # Use previously-computed Codex results
cached_codex_path: './cached_code/v+d_none_gpt4_simple/v1.csv' # Path to the csv results file from which to load Codex results

vqa_model: blip # should be one of the following: gemini, blip
face_model: deepface # should be one of the following: deepface, label

dataset:
data_path: /work/adnchao1/MovieNet
anno_path: ../tropes/tim_mainset_20_trope_50_movie.json
dataset_name: TiM
version: multiplechoice
fps: 10
max_num_frames: 120
split: val
batch_size: 16
# max_samples: 1
start_sample: 0

codex:
api_prompt: ./prompts/benchmarks/tim/dialogue/api.py
function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt
function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt
example_prompt: [
./prompts/benchmarks/tim/dialogue/examples/asshole_victim.py,
./prompts/benchmarks/tim/dialogue/examples/downer_ending.py,
./prompts/benchmarks/tim/dialogue/examples/kick_the_dog.py,
./prompts/benchmarks/tim/dialogue/examples/love_at_first_sight.py,
./prompts/benchmarks/tim/dialogue/examples/stealth_pun.py
]
prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt
temperature: 0. # Temperature for Codex. (Almost) deterministic if 0
best_of: 1 # Number of tries to choose from. Use when temperature > 0
max_tokens: 4096 # Maximum number of tokens to generate for Codex
prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
model: gpt-4-turbo # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models

load_models: # Which pretrained models to load
maskrcnn: True
clip: False
glip: True
owlvit: False
tcl: False
gpt3_qa: True
gpt3_general: True
gpt3_summarize: True
depth: False
blip: True
saliency: False
xvlm: False
codex: False
codellama: False
reflection: False
gemini: False
deepface: False

# Other unimportant parameters
gpt3: # GPT-3 configuration
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
qa_prompt: ./prompts/gpt3/gpt3_qa.txt
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models

# some model parameter
detect_thresholds: # Thresholds for the models that perform detection
glip: 0.5
maskrcnn: 0.8
owlvit: 0.1
ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned
crop_larger_margin: True # Increase size of crop by 10% to include more context
verify_property: # Parameters for verify_property
model: xvlm # Model to use for verify_property
thresh_clip: 0.6
thresh_tcl: 0.25
thresh_xvlm: 0.6
best_match_model: xvlm # Which model to use for best_[image, text]_match
blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True
blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2

# Saving and loading parameters
save: True # Save the results to a file
save_new_results: True # If False, overwrite the results file
use_cache: False # Use cache for the models that support it (now, GPT-3)
clear_cache: False # Clear stored cache
log_every: 20 # Log accuracy every n batches
wandb: False

# reasoning prompt
select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt
fixed_code_file: ./prompts/fixed_code/blip2_video.prompt
use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex)
Loading

0 comments on commit 4c68f27

Please sign in to comment.