Skip to content

Commit

Permalink
[Fix] Skip evaluation for MMBench_TEST on non-official servers
Browse files Browse the repository at this point in the history
  • Loading branch information
kennymckormick committed Jan 20, 2024
1 parent 02860dd commit e992046
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
8 changes: 7 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@ def main():
if args.mode == 'all':
logger.error(f'Dataset {dataset_name} does not support `evaluation` now, will skip the evaluation. ')

model = infer_data_job(model, model_name=model_name, dataset_name=dataset_name, verbose=args.verbose, api_nproc=args.nproc, ignore_failed=args.ignore)
model = infer_data_job(model, model_name=model_name, dataset_name=dataset_name, verbose=args.verbose, api_nproc=args.nproc, ignore_failed=args.ignore)

if dataset_name in ['MMBench_TEST_CN', 'MMBench_TEST_EN']:
if not MMBenchOfficialServer():
logger.error(f'Can not evaluate {dataset_name} on non-official servers, will skip the evaluation. ')
continue

if rank == 0:
time.sleep(3)
res = None
Expand Down
9 changes: 8 additions & 1 deletion vlmeval/smp/vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,11 @@ def circular_pred(df, extract_func=None):
flag_map[i % shift] = False
flag_map = {k: v for k, v in flag_map.items() if valid_map[k]}
flags = list(flag_map.values())
return np.mean(flags)
return np.mean(flags)

def MMBenchOfficialServer():
root = LMUDataRoot()
for dataset in ['MMBench', 'MMBench_CN', 'MMBench_TEST_EN', 'MMBench_TEST_CN']:
if osp.exists(f'{root}/{dataset}.tsv'):
return True
return False

0 comments on commit e992046

Please sign in to comment.