fix conflict

ander1119 · May 19, 2024 · 4c68f27 · 4c68f27
2 parents a17429a + 9fa8340
commit 4c68f27
Show file tree

Hide file tree

Showing 65 changed files with 19,376 additions and 6,216 deletions.
diff --git a/cached_code/v+d_df_gpt35_simple/v1.csv b/cached_code/v+d_df_gpt35_simple/v1.csv
diff --git a/cached_code/v+d_df_gpt4_simple/v1.csv b/cached_code/v+d_df_gpt4_simple/v1.csv
diff --git a/cached_code/v+d_none_gpt4_simple/v1.csv b/cached_code/v+d_none_gpt4_simple/v1.csv
diff --git a/cached_code/v_df_gpt35_simple/v1.csv b/cached_code/v_df_gpt35_simple/v1.csv
diff --git a/cached_code/v_df_gpt4_simple/v1.csv b/cached_code/v_df_gpt4_simple/v1.csv
diff --git a/configs/ablations/v+d_120_blip2_df_gpt35_simple_mainset.yaml b/configs/ablations/v+d_120_blip2_df_gpt35_simple_mainset.yaml
@@ -0,0 +1,96 @@
+multiprocessing: True # Run the models and samples in parallel
+path_pretrained_models: './pretrained_models' # Path to the pretrained models
+execute_code: True # Execute the code after generating it. Only applies to main_batch
+
+results_dir: ./results/v+d_120_blip2_df_gpt35_simple_mainset # Directory to save the results
+use_cached_codex: True # Use previously-computed Codex results
+cached_codex_path: './cached_code/v+d_df_gpt35_simple/v1.csv' # Path to the csv results file from which to load Codex results
+
+vqa_model: blip # should be one of the following: gemini, blip
+face_model: deepface # should be one of the following: deepface, label
+
+dataset:
+ data_path: /project/mira/dataset/MovieNet
+ anno_path: ../tropes/tim_mainset_20_trope_50_movie.json
+ dataset_name: TiM
+ version: multiplechoice
+ fps: 10
+ max_num_frames: 120
+ split: val
+ batch_size: 1
+ # max_samples: 1
+ start_sample: 0
+
+codex:
+ api_prompt: ./prompts/benchmarks/tim/face_dialogue/api.py
+ function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt
+ function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt
+ example_prompt: [
+ ./prompts/benchmarks/tim/face_dialogue/examples/asshole_victim.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/downer_ending.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/kick_the_dog.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/love_at_first_sight.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/stealth_pun.py
+ ]
+ prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt
+ temperature: 0. # Temperature for Codex. (Almost) deterministic if 0
+ best_of: 1 # Number of tries to choose from. Use when temperature > 0
+ max_tokens: 4096 # Maximum number of tokens to generate for Codex
+ prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
+ model: gpt-3.5-turbo-0125 # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models
+
+load_models: # Which pretrained models to load
+ maskrcnn: True
+ clip: False
+ glip: True
+ owlvit: False
+ tcl: False
+ gpt3_qa: True
+ gpt3_general: True
+ gpt3_summarize: True
+ depth: False
+ blip: True
+ saliency: False
+ xvlm: False
+ codex: True
+ codellama: False
+ reflection: False
+ gemini: False
+ deepface: True
+
+# Other unimportant parameters
+gpt3: # GPT-3 configuration
+ n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
+ qa_prompt: ./prompts/gpt3/gpt3_qa.txt
+ guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
+ temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
+ model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models
+
+# some model parameter
+detect_thresholds: # Thresholds for the models that perform detection
+ glip: 0.5
+ maskrcnn: 0.8
+ owlvit: 0.1
+ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned
+crop_larger_margin: True # Increase size of crop by 10% to include more context
+verify_property: # Parameters for verify_property
+ model: xvlm # Model to use for verify_property
+ thresh_clip: 0.6
+ thresh_tcl: 0.25
+ thresh_xvlm: 0.6
+best_match_model: xvlm # Which model to use for best_[image, text]_match
+blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True
+blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2
+
+# Saving and loading parameters
+save: True # Save the results to a file
+save_new_results: True # If False, overwrite the results file
+use_cache: False # Use cache for the models that support it (now, GPT-3)
+clear_cache: False # Clear stored cache
+log_every: 20 # Log accuracy every n batches
+wandb: False
+
+# reasoning prompt
+select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt
+fixed_code_file: ./prompts/fixed_code/blip2_video.prompt
+use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex)
diff --git a/configs/ablations/v+d_120_blip2_df_gpt4_simple_mainset.yaml b/configs/ablations/v+d_120_blip2_df_gpt4_simple_mainset.yaml
@@ -0,0 +1,96 @@
+multiprocessing: True # Run the models and samples in parallel
+path_pretrained_models: './pretrained_models' # Path to the pretrained models
+execute_code: True # Execute the code after generating it. Only applies to main_batch
+
+results_dir: ./results/v+d_120_blip2_df_gpt4_simple_mainset # Directory to save the results
+use_cached_codex: True # Use previously-computed Codex results
+cached_codex_path: './cached_code/v+d_df_gpt4_simple/v1.csv' # Path to the csv results file from which to load Codex results
+
+vqa_model: blip # should be one of the following: gemini, blip
+face_model: deepface # should be one of the following: deepface, label
+
+dataset:
+ data_path: /work/adnchao1/MovieNet
+ anno_path: ../tropes/tim_mainset_20_trope_50_movie.json
+ dataset_name: TiM
+ version: multiplechoice
+ fps: 10
+ max_num_frames: 120
+ split: val
+ batch_size: 16
+ # max_samples: 1
+ start_sample: 0
+
+codex:
+ api_prompt: ./prompts/benchmarks/tim/face_dialogue/api.py
+ function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt
+ function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt
+ example_prompt: [
+ ./prompts/benchmarks/tim/face_dialogue/examples/asshole_victim.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/downer_ending.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/kick_the_dog.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/love_at_first_sight.py,
+ ./prompts/benchmarks/tim/face_dialogue/examples/stealth_pun.py
+ ]
+ prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt
+ temperature: 0. # Temperature for Codex. (Almost) deterministic if 0
+ best_of: 1 # Number of tries to choose from. Use when temperature > 0
+ max_tokens: 4096 # Maximum number of tokens to generate for Codex
+ prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
+ model: gpt-4-turbo # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models
+
+load_models: # Which pretrained models to load
+ maskrcnn: True
+ clip: False
+ glip: True
+ owlvit: False
+ tcl: False
+ gpt3_qa: True
+ gpt3_general: True
+ gpt3_summarize: True
+ depth: False
+ blip: True
+ saliency: False
+ xvlm: False
+ codex: False
+ codellama: False
+ reflection: False
+ gemini: False
+ deepface: True
+
+# Other unimportant parameters
+gpt3: # GPT-3 configuration
+ n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
+ qa_prompt: ./prompts/gpt3/gpt3_qa.txt
+ guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
+ temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
+ model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models
+
+# some model parameter
+detect_thresholds: # Thresholds for the models that perform detection
+ glip: 0.5
+ maskrcnn: 0.8
+ owlvit: 0.1
+ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned
+crop_larger_margin: True # Increase size of crop by 10% to include more context
+verify_property: # Parameters for verify_property
+ model: xvlm # Model to use for verify_property
+ thresh_clip: 0.6
+ thresh_tcl: 0.25
+ thresh_xvlm: 0.6
+best_match_model: xvlm # Which model to use for best_[image, text]_match
+blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True
+blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2
+
+# Saving and loading parameters
+save: True # Save the results to a file
+save_new_results: True # If False, overwrite the results file
+use_cache: False # Use cache for the models that support it (now, GPT-3)
+clear_cache: False # Clear stored cache
+log_every: 20 # Log accuracy every n batches
+wandb: False
+
+# reasoning prompt
+select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt
+fixed_code_file: ./prompts/fixed_code/blip2_video.prompt
+use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex)
diff --git a/configs/ablations/v+d_120_blip2_none_gpt4_simple_mainset.yaml b/configs/ablations/v+d_120_blip2_none_gpt4_simple_mainset.yaml
@@ -0,0 +1,96 @@
+multiprocessing: True # Run the models and samples in parallel
+path_pretrained_models: './pretrained_models' # Path to the pretrained models
+execute_code: True # Execute the code after generating it. Only applies to main_batch
+
+results_dir: ./results/v+d_120_blip2_none_gpt4_simple_mainset # Directory to save the results
+use_cached_codex: True # Use previously-computed Codex results
+cached_codex_path: './cached_code/v+d_none_gpt4_simple/v1.csv' # Path to the csv results file from which to load Codex results
+
+vqa_model: blip # should be one of the following: gemini, blip
+face_model: deepface # should be one of the following: deepface, label
+
+dataset:
+ data_path: /work/adnchao1/MovieNet
+ anno_path: ../tropes/tim_mainset_20_trope_50_movie.json
+ dataset_name: TiM
+ version: multiplechoice
+ fps: 10
+ max_num_frames: 120
+ split: val
+ batch_size: 16
+ # max_samples: 1
+ start_sample: 0
+
+codex:
+ api_prompt: ./prompts/benchmarks/tim/dialogue/api.py
+ function_signature_prompt: ./prompts/benchmarks/tim/function_signature.prompt
+ function_format_prompt: ./prompts/benchmarks/tim/function_format.prompt
+ example_prompt: [
+ ./prompts/benchmarks/tim/dialogue/examples/asshole_victim.py,
+ ./prompts/benchmarks/tim/dialogue/examples/downer_ending.py,
+ ./prompts/benchmarks/tim/dialogue/examples/kick_the_dog.py,
+ ./prompts/benchmarks/tim/dialogue/examples/love_at_first_sight.py,
+ ./prompts/benchmarks/tim/dialogue/examples/stealth_pun.py
+ ]
+ prototype_prompt: ./prompts/benchmarks/tim/prototype.prompt
+ temperature: 0. # Temperature for Codex. (Almost) deterministic if 0
+ best_of: 1 # Number of tries to choose from. Use when temperature > 0
+ max_tokens: 4096 # Maximum number of tokens to generate for Codex
+ prompt: ./prompts/chatapi.prompt # Codex prompt file, which defines the API. (doesn't support video for now due to token limits)
+ model: gpt-4-turbo # Codex model to use. [gpt-4-turbo, gpt-3.5-turbo]. See openai.Model.list() for available models
+
+load_models: # Which pretrained models to load
+ maskrcnn: True
+ clip: False
+ glip: True
+ owlvit: False
+ tcl: False
+ gpt3_qa: True
+ gpt3_general: True
+ gpt3_summarize: True
+ depth: False
+ blip: True
+ saliency: False
+ xvlm: False
+ codex: False
+ codellama: False
+ reflection: False
+ gemini: False
+ deepface: False
+
+# Other unimportant parameters
+gpt3: # GPT-3 configuration
+ n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
+ qa_prompt: ./prompts/gpt3/gpt3_qa.txt
+ guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
+ temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
+ model: gpt-3.5-turbo-0125 # See openai.Model.list() for available models
+
+# some model parameter
+detect_thresholds: # Thresholds for the models that perform detection
+ glip: 0.5
+ maskrcnn: 0.8
+ owlvit: 0.1
+ratio_box_area_to_image_area: 0.0 # Any detected patch under this size will not be returned
+crop_larger_margin: True # Increase size of crop by 10% to include more context
+verify_property: # Parameters for verify_property
+ model: xvlm # Model to use for verify_property
+ thresh_clip: 0.6
+ thresh_tcl: 0.25
+ thresh_xvlm: 0.6
+best_match_model: xvlm # Which model to use for best_[image, text]_match
+blip_half_precision: True # Use 8bit (Faster but slightly less accurate) for BLIP if True
+blip_v2_model_type: blip2-flan-t5-xl # Which model to use for BLIP-2
+
+# Saving and loading parameters
+save: True # Save the results to a file
+save_new_results: True # If False, overwrite the results file
+use_cache: False # Use cache for the models that support it (now, GPT-3)
+clear_cache: False # Clear stored cache
+log_every: 20 # Log accuracy every n batches
+wandb: False
+
+# reasoning prompt
+select_answer_prompt: ./prompts/gpt3/timos_bc_question_reason.txt
+fixed_code_file: ./prompts/fixed_code/blip2_video.prompt
+use_fixed_code: False # Use a fixed code for all samples (do not generate with Codex)