From 917b02d5eecf6a683087c598a5e988d86c440243 Mon Sep 17 00:00:00 2001 From: MingxuanXia Date: Tue, 19 Mar 2024 03:42:02 +0000 Subject: [PATCH] Update multi modal evaluation --- docs/examples/multimodal.md | 32 ++-- examples/multimodal.ipynb | 37 ++--- promptbench/dataload/dataset.py | 262 ++++++++++++++++---------------- 3 files changed, 152 insertions(+), 179 deletions(-) diff --git a/docs/examples/multimodal.md b/docs/examples/multimodal.md index c360cd8..d1a8a78 100644 --- a/docs/examples/multimodal.md +++ b/docs/examples/multimodal.md @@ -28,32 +28,18 @@ print(pb.SUPPORTED_DATASETS_VLM) dataset = pb.DatasetLoader.load_dataset("mmmu") # print the first 5 examples -dataset[:5] +for idx in range(5): + print(dataset[idx]) ``` All supported datasets: ['vqav2', 'nocaps', 'science_qa', 'math_vista', 'ai2d', 'mmmu', 'chart_qa'] - - - - - - [{'images': [], - 'answer': 'B', - 'question': ' Baxter Company has a relevant range of production between 15,000 and 30,000 units. The following cost data represents average variable costs per unit for 25,000 units of production. If 30,000 units are produced, what are the per unit manufacturing overhead costs incurred?\nA: $6\nB: $7\nC: $8\nD: $9'}, - {'images': [], - 'answer': 'C', - 'question': 'Assume accounts have normal balances, solve for the one missing account balance: Dividends. Equipment was recently purchased, so there is neither depreciation expense nor accumulated depreciation. \nA: $194,815\nB: $182,815\nC: $12,000\nD: $9,000'}, - {'images': [], - 'answer': 'B', - 'question': 'Maxwell Software, Inc., has the following mutually exclusive projects.Suppose the company uses the NPV rule to rank these two projects. Which project should be chosen if the appropriate discount rate is 15 percent?\nA: Project A\nB: Project B'}, - {'images': [], - 'answer': 'D', - 'question': "Each situation below relates to an independent company's Owners' Equity. Calculate the missing values of company 2.\nA: $1,620\nB: $12,000\nC: $51,180\nD: $0"}, - {'images': [], - 'answer': 'B', - 'question': 'The following data show the units in beginning work in process inventory, the number of units started, the number of units transferred, and the percent completion of the ending work in process for conversion. Given that materials are added at the beginning of the process, what are the equivalent units for conversion costs for each quarter using the weighted-average method? Assume that the quarters are independent.\nA: 132,625\nB: 134,485\nC: 135,332\nD: 132,685'}] - + Images already saved to local, loading file: /home/v-mingxia/promptbench/promptbench/data/mmmu/validation.json + {'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/0_image_1.png'], 'answer': 'B', 'question': ' Baxter Company has a relevant range of production between 15,000 and 30,000 units. The following cost data represents average variable costs per unit for 25,000 units of production. If 30,000 units are produced, what are the per unit manufacturing overhead costs incurred?\nA: $6\nB: $7\nC: $8\nD: $9'} + {'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/1_image_1.png'], 'answer': 'C', 'question': 'Assume accounts have normal balances, solve for the one missing account balance: Dividends. Equipment was recently purchased, so there is neither depreciation expense nor accumulated depreciation. \nA: $194,815\nB: $182,815\nC: $12,000\nD: $9,000'} + {'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/2_image_1.png'], 'answer': 'B', 'question': 'Maxwell Software, Inc., has the following mutually exclusive projects.Suppose the company uses the NPV rule to rank these two projects. Which project should be chosen if the appropriate discount rate is 15 percent?\nA: Project A\nB: Project B'} + {'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/3_image_1.png'], 'answer': 'D', 'question': "Each situation below relates to an independent company's Owners' Equity. Calculate the missing values of company 2.\nA: $1,620\nB: $12,000\nC: $51,180\nD: $0"} + {'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/4_image_1.png'], 'answer': 'B', 'question': 'The following data show the units in beginning work in process inventory, the number of units started, the number of units transferred, and the percent completion of the ending work in process for conversion. Given that materials are added at the beginning of the process, what are the equivalent units for conversion costs for each quarter using the weighted-average method? Assume that the quarters are independent.\nA: 132,625\nB: 134,485\nC: 135,332\nD: 132,685'} ## Load models @@ -104,7 +90,7 @@ for prompt in prompts: for data in tqdm(dataset): # process input input_text = pb.InputProcess.basic_format(prompt, data) - input_images = data['images'] + input_images = data['images'] # please use data['image_paths'] instead of data['images'] for models that only support image path/url, such as GPT-4v label = data['answer'] raw_pred = model(input_images, input_text) # process output diff --git a/examples/multimodal.ipynb b/examples/multimodal.ipynb index adb3014..9a072a3 100644 --- a/examples/multimodal.ipynb +++ b/examples/multimodal.ipynb @@ -51,32 +51,14 @@ "output_type": "stream", "text": [ "All supported datasets: \n", - "['vqav2', 'nocaps', 'science_qa', 'math_vista', 'ai2d', 'mmmu', 'chart_qa']\n" + "['vqav2', 'nocaps', 'science_qa', 'math_vista', 'ai2d', 'mmmu', 'chart_qa']\n", + "Images already saved to local, loading file: /home/v-mingxia/promptbench/promptbench/data/mmmu/validation.json\n", + "{'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/0_image_1.png'], 'answer': 'B', 'question': ' Baxter Company has a relevant range of production between 15,000 and 30,000 units. The following cost data represents average variable costs per unit for 25,000 units of production. If 30,000 units are produced, what are the per unit manufacturing overhead costs incurred?\\nA: $6\\nB: $7\\nC: $8\\nD: $9'}\n", + "{'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/1_image_1.png'], 'answer': 'C', 'question': 'Assume accounts have normal balances, solve for the one missing account balance: Dividends. Equipment was recently purchased, so there is neither depreciation expense nor accumulated depreciation. \\nA: $194,815\\nB: $182,815\\nC: $12,000\\nD: $9,000'}\n", + "{'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/2_image_1.png'], 'answer': 'B', 'question': 'Maxwell Software, Inc., has the following mutually exclusive projects.Suppose the company uses the NPV rule to rank these two projects. Which project should be chosen if the appropriate discount rate is 15 percent?\\nA: Project A\\nB: Project B'}\n", + "{'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/3_image_1.png'], 'answer': 'D', 'question': \"Each situation below relates to an independent company's Owners' Equity. Calculate the missing values of company 2.\\nA: $1,620\\nB: $12,000\\nC: $51,180\\nD: $0\"}\n", + "{'images': [], 'image_paths': ['/home/v-mingxia/promptbench/promptbench/data/mmmu/validation/4_image_1.png'], 'answer': 'B', 'question': 'The following data show the units in beginning work in process inventory, the number of units started, the number of units transferred, and the percent completion of the ending work in process for conversion. Given that materials are added at the beginning of the process, what are the equivalent units for conversion costs for each quarter using the weighted-average method? Assume that the quarters are independent.\\nA: 132,625\\nB: 134,485\\nC: 135,332\\nD: 132,685'}\n" ] - }, - { - "data": { - "text/plain": [ - "[{'images': [],\n", - " 'answer': 'B',\n", - " 'question': ' Baxter Company has a relevant range of production between 15,000 and 30,000 units. The following cost data represents average variable costs per unit for 25,000 units of production. If 30,000 units are produced, what are the per unit manufacturing overhead costs incurred?\\nA: $6\\nB: $7\\nC: $8\\nD: $9'},\n", - " {'images': [],\n", - " 'answer': 'C',\n", - " 'question': 'Assume accounts have normal balances, solve for the one missing account balance: Dividends. Equipment was recently purchased, so there is neither depreciation expense nor accumulated depreciation. \\nA: $194,815\\nB: $182,815\\nC: $12,000\\nD: $9,000'},\n", - " {'images': [],\n", - " 'answer': 'B',\n", - " 'question': 'Maxwell Software, Inc., has the following mutually exclusive projects.Suppose the company uses the NPV rule to rank these two projects. Which project should be chosen if the appropriate discount rate is 15 percent?\\nA: Project A\\nB: Project B'},\n", - " {'images': [],\n", - " 'answer': 'D',\n", - " 'question': \"Each situation below relates to an independent company's Owners' Equity. Calculate the missing values of company 2.\\nA: $1,620\\nB: $12,000\\nC: $51,180\\nD: $0\"},\n", - " {'images': [],\n", - " 'answer': 'B',\n", - " 'question': 'The following data show the units in beginning work in process inventory, the number of units started, the number of units transferred, and the percent completion of the ending work in process for conversion. Given that materials are added at the beginning of the process, what are the equivalent units for conversion costs for each quarter using the weighted-average method? Assume that the quarters are independent.\\nA: 132,625\\nB: 134,485\\nC: 135,332\\nD: 132,685'}]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -89,7 +71,8 @@ "dataset = pb.DatasetLoader.load_dataset(\"mmmu\")\n", "\n", "# print the first 5 examples\n", - "dataset[:5]" + "for idx in range(5):\n", + " print(dataset[idx])" ] }, { @@ -219,7 +202,7 @@ " for data in tqdm(dataset):\n", " # process input\n", " input_text = pb.InputProcess.basic_format(prompt, data)\n", - " input_images = data['images']\n", + " input_images = data['images'] # please use data['image_paths'] instead of data['images'] for models that only support image path/url, such as GPT-4v\n", " label = data['answer']\n", " raw_pred = model(input_images, input_text)\n", " # process output\n", diff --git a/promptbench/dataload/dataset.py b/promptbench/dataload/dataset.py index 03c3803..88066ad 100644 --- a/promptbench/dataload/dataset.py +++ b/promptbench/dataload/dataset.py @@ -6,6 +6,8 @@ import random import requests import json +from PIL import Image as PILImage +from tqdm import tqdm from promptbench.config import * from datasets import load_dataset @@ -61,6 +63,42 @@ def __getitem__(self, idx): def extract_answer(self, output): return output + def save_images_to_local(self, dataset, split, key_list): + # Get the parent directory + cur_dir = os.path.dirname(os.path.abspath(__file__)) + + self.data_dir = os.path.join(os.path.dirname(cur_dir), 'data') + if not os.path.exists(self.data_dir): + os.mkdir(self.data_dir) + + self.dataset_dir = os.path.join(self.data_dir, dataset) + if not os.path.exists(self.dataset_dir): + os.mkdir(self.dataset_dir) + + self.split_dir = os.path.join(self.dataset_dir, split) + if not os.path.exists(self.split_dir): + os.mkdir(self.split_dir) + + self.filepath = os.path.join(self.dataset_dir, f"{split}.json") + if not os.path.exists(self.filepath): + local_path_dict = {} + print('Saveing images to local path: ', self.split_dir) + for idx in tqdm(range(len(self.data))): + local_path_dict[str(idx)] = [] + for key in key_list: + if self.data[idx][key] is not None: + image_dir = os.path.join(self.split_dir, f'{idx}_{key}.png') + self.data[idx][key].save(image_dir) + local_path_dict[str(idx)].append(image_dir) + print('Saving file: ', self.filepath) + with open(self.filepath, 'w') as f: + json.dump(local_path_dict, f) + else: + print('Images already saved to local, loading file: ', self.filepath) + with open(self.filepath, 'r') as f: + local_path_dict = json.load(f) + + self.local_path_dict = local_path_dict class BoolLogic(Dataset): """ @@ -663,32 +701,30 @@ class VQAv2(Dataset): Example data format: { - 'question_type': 'what is', - 'multiple_choice_answer': 'picnic table', - 'answers': [{'answer': 'table', 'answer_confidence': 'yes', 'answer_id': 1}, - {'answer': 'table', 'answer_confidence': 'yes', 'answer_id': 2}, - {'answer': 'table', 'answer_confidence': 'yes', 'answer_id': 3}, - {'answer': 'picnic table', 'answer_confidence': 'yes', 'answer_id': 4}, - {'answer': 'picnic table', 'answer_confidence': 'yes', 'answer_id': 5}, - {'answer': 'picnic table', 'answer_confidence': 'yes', 'answer_id': 6}, - {'answer': 'picnic table', 'answer_confidence': 'yes', 'answer_id': 7}, - {'answer': 'picnic table', 'answer_confidence': 'yes', 'answer_id': 8}, - {'answer': 'skateboard', 'answer_confidence': 'yes', 'answer_id': 9}, - {'answer': 'picnic table', 'answer_confidence': 'yes', 'answer_id': 10}], - 'image_id': 262148, - 'answer_type': 'other', - 'question_id': 262148002, - 'question': 'What is he on top of?', - 'image': + 'images': [], + 'image_paths': ['/Users/username/.cache/huggingface/datasets/downloads/extracted/adf50b3f63fdc93e9e2d368b11520a5796a4e904a80852e73a19a7112fca3592/val2014/COCO_val2014_000000262148.jpg'], + 'answers': [{'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 1}, + {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 2}, + {'answer': 'at table', 'answer_confidence': 'yes', 'answer_id': 3}, + {'answer': 'skateboard', 'answer_confidence': 'yes', 'answer_id': 4}, + {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 5}, + {'answer': 'table', 'answer_confidence': 'yes', 'answer_id': 6}, + {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 7}, + {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 8}, + {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 9}, + {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 10}], + 'question': 'Where is he looking?' } """ def __init__(self): - data = load_dataset("HuggingFaceM4/VQAv2", split="validation") + from datasets import Image + data = load_dataset("HuggingFaceM4/VQAv2", split="validation").cast_column("image", Image(decode=False)) self.data = data def __getitem__(self, idx): assert len(self.data) > 0, "Empty dataset. Please load data first." - return {"images": [self.data[idx]['image']], + return {"images": [PILImage.open(self.data[idx]['image']['path'])], + "image_paths": [self.data[idx]['image']['path']], "answers": self.data[idx]['answers'], "question": self.data[idx]['question'],} @@ -703,17 +739,9 @@ class NoCaps(Dataset): Example data format: { - 'image': , - 'image_coco_url': 'https://s3.amazonaws.com/nocaps/val/0013ea2087020901.jpg', - 'image_date_captured': '2018-11-06 11:04:33', - 'image_file_name': '0013ea2087020901.jpg', - 'image_height': 1024, - 'image_width': 732, - 'image_id': 0, - 'image_license': 0, - 'image_open_images_id': '0013ea2087020901', - 'annotations_ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], - 'annotations_captions': ['A baby is standing in front of a house.', + 'images': [], + 'image_paths': ['https://s3.amazonaws.com/nocaps/val/0013ea2087020901.jpg'], + 'answers': ['A baby is standing in front of a house.', 'A little girl in a white jacket and sandals.', 'A young child stands in front of a house.', 'A child is wearing a white shirt and standing on a side walk. ', @@ -726,12 +754,16 @@ class NoCaps(Dataset): } """ def __init__(self): - data = load_dataset("HuggingFaceM4/NoCaps", split="validation") + from datasets import Image + data = load_dataset("HuggingFaceM4/NoCaps", split="validation").cast_column("image", Image(decode=False)) + data1 = load_dataset("HuggingFaceM4/NoCaps", split="validation") self.data = data + self.data1 = data1 def __getitem__(self, idx): assert len(self.data) > 0, "Empty dataset. Please load data first." - return {"images": [self.data[idx]['image']], + return {"images": [self.data1[idx]['image']], + "image_paths": [self.data[idx]['image']['path']], "answers": self.data[idx]['annotations_captions']} class MathVista(Dataset): @@ -745,37 +777,22 @@ class MathVista(Dataset): Example data format: { - 'pid': '1', - 'question': "When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?", - 'image': 'images/1.jpg', - 'decoded_image': , - 'choices': None, - 'unit': None, - 'precision': 1.0, + 'images': [], + 'image_paths': ['/Users/username/promptbench/promptbench/data/math_vista/testmini/0_decoded_image.png'], 'answer': '1.2', - 'question_type': 'free_form', - 'answer_type': 'float', - 'metadata': {'category': 'math-targeted-vqa', - 'context': 'scientific figure', - 'grade': 'college', - 'img_height': 720, - 'img_width': 1514, - 'language': 'english', - 'skills': ['scientific reasoning'], - 'source': 'SciBench', - 'split': 'testmini', - 'task': 'textbook question answering'}, - 'query': "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?" + 'question': "When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?\nANSWER TYPE: float" } """ def __init__(self): data = load_dataset("AI4Math/MathVista", split="testmini") self.data = data + self.save_images_to_local(dataset='math_vista', split='testmini', key_list=['decoded_image']) def __getitem__(self, idx): assert len(self.data) > 0, "Empty dataset. Please load data first." return {"images": [self.data[idx]['decoded_image']], + "image_paths": self.local_path_dict[str(idx)], "answer": self.data[idx]['answer'], "question": self.data[idx]['question'] + "\nANSWER TYPE: " + self.data[idx]['answer_type'],} @@ -790,27 +807,27 @@ class AI2D(Dataset): Example data format: { - 'question': 'which of these define dairy item', - 'options': ['c', 'D', 'b', 'a'], - 'answer': '1', - 'image': + 'images': [], + 'image_paths': ['/Users/username/promptbench/promptbench/data/ai2d/test/0_image.png'], + 'question': 'which of these define dairy item\n0: c\n1: D\n2: b\n3: a', + 'answer': '1' } """ def __init__(self): data = load_dataset("lmms-lab/ai2d", split="test") - self.data = [] - - for d in data: - choices_dict = dict(enumerate(d['options'])) - choices = '' - for k, v in choices_dict.items(): - choices += f"\n{k}: {v}" + self.data = data + self.save_images_to_local(dataset='ai2d', split='test', key_list=['image']) - self.data.append({ - "images": [d['image']], - "question": d['question'] + choices, - "answer": d['answer'] - }) + def __getitem__(self, idx): + assert len(self.data) > 0, "Empty dataset. Please load data first." + choices_dict = dict(enumerate(self.data[idx]['options'])) + choices = '' + for k, v in choices_dict.items(): + choices += f"\n{k}: {v}" + return({"images": [self.data[idx]['image']], + "image_paths": self.local_path_dict[str(idx)], + "question": self.data[idx]['question'] + choices, + "answer": self.data[idx]['answer']}) class ChartQA(Dataset): """ @@ -823,19 +840,21 @@ class ChartQA(Dataset): Example data format: { - 'type': 'human_test', - 'question': 'How many food item is shown in the bar graph?', + 'images': [], + 'image_paths': ['/Users/username/promptbench/promptbench/data/chart_qa/test/0_image.png'], 'answer': '14', - 'image': + 'question': 'How many food item is shown in the bar graph?' } """ def __init__(self): data = load_dataset("lmms-lab/ChartQA", split="test") self.data = data + self.save_images_to_local(dataset='chart_qa', split='test', key_list=['image']) def __getitem__(self, idx): assert len(self.data) > 0, "Empty dataset. Please load data first." return {"images": [self.data[idx]['image']], + "image_paths": self.local_path_dict[str(idx)], "answer": self.data[idx]['answer'], "question": self.data[idx]['question'],} @@ -850,38 +869,31 @@ class ScienceQA(Dataset): Example data format: { - 'image': None, - 'question': 'Which figure of speech is used in this text?\nSing, O goddess, the anger of Achilles son of Peleus, that brought countless ills upon the Achaeans.\n—Homer, The Iliad', - 'choices': ['chiasmus', 'apostrophe'], + 'images': [], + 'image_paths': ['/Users/username/promptbench/promptbench/data/science_qa/validation/0_image.png'], 'answer': 1, - 'hint': '', - 'task': 'closed choice', - 'grade': 'grade11', - 'subject': 'language science', - 'topic': 'figurative-language', - 'category': 'Literary devices', - 'skill': 'Classify the figure of speech: anaphora, antithesis, apostrophe, assonance, chiasmus, understatement', - 'lecture': 'Figures of speech are words or phrases that use language in a nonliteral or unusual way. They can make writing more expressive.\nAnaphora is the repetition of the same word or words at the beginning of several phrases or clauses.\nWe are united. We are powerful. We are winners.\nAntithesis involves contrasting opposing ideas within a parallel grammatical structure.\nI want to help, not to hurt.\nApostrophe is a direct address to an absent person or a nonhuman entity.\nOh, little bird, what makes you sing so beautifully?\nAssonance is the repetition of a vowel sound in a series of nearby words.\nTry to light the fire.\nChiasmus is an expression in which the second half parallels the first but reverses the order of words.\nNever let a fool kiss you or a kiss fool you.\nUnderstatement involves deliberately representing something as less serious or important than it really is.\nAs you know, it can get a little cold in the Antarctic.', - 'solution': 'The text uses apostrophe, a direct address to an absent person or a nonhuman entity.\nO goddess is a direct address to a goddess, a nonhuman entity.'} + 'question': "Which animal's mouth is also adapted for bottom feeding?\n0: discus\n1: armored catfish" } """ def __init__(self): data = load_dataset("derek-thomas/ScienceQA", split="validation") - self.data = [] - + data_with_images = [] for d in data: if d['image'] is not None: + data_with_images.append(d) + self.data = data_with_images + self.save_images_to_local(dataset='science_qa', split='validation', key_list=['image']) - choices_dict = dict(enumerate(d['choices'])) - choices = '' - for k, v in choices_dict.items(): - choices += f"\n{k}: {v}" - - self.data.append({ - "images": [d['image']], - "question": d['question'] + choices, - "answer": d['answer'] - }) + def __getitem__(self, idx): + assert len(self.data) > 0, "Empty dataset. Please load data first." + choices_dict = dict(enumerate(self.data[idx]['choices'])) + choices = '' + for k, v in choices_dict.items(): + choices += f"\n{k}: {v}" + return {"images": [self.data[idx]['image']], + "image_paths": self.local_path_dict[str(idx)], + "answer": self.data[idx]['answer'], + "question": self.data[idx]['question'] + choices,} class MMMU(Dataset): """ @@ -893,43 +905,35 @@ class MMMU(Dataset): MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI (https://arxiv.org/abs/2311.16502) { - 'id': 'validation_Accounting_1', - 'question': ' Baxter Company has a relevant range of production between 15,000 and 30,000 units. The following cost data represents average variable costs per unit for 25,000 units of production. If 30,000 units are produced, what are the per unit manufacturing overhead costs incurred?', - 'options': "['$6', '$7', '$8', '$9']", - 'explanation': '', - 'image_1': , - 'image_2': None, - 'image_3': None, - 'image_4': None, - 'image_5': None, - 'image_6': None, - 'image_7': None, - 'img_type': "['Tables']", + 'images': [], + 'image_paths': ['/Users/username/promptbench/promptbench/data/mmmu/validation/0_image_1.png'], 'answer': 'B', - 'topic_difficulty': 'Medium', - 'question_type': 'multiple-choice', - 'subfield': 'Managerial Accounting' + 'question': ' Baxter Company has a relevant range of production between 15,000 and 30,000 units. The following cost data represents average variable costs per unit for 25,000 units of production. If 30,000 units are produced, what are the per unit manufacturing overhead costs incurred?\nA: $6\nB: $7\nC: $8\nD: $9' } """ def __init__(self): data = load_dataset("lmms-lab/MMMU", split="validation") - self.data = [] + self.data = data + self.save_images_to_local(dataset='mmmu', split='validation', key_list=['image_1', 'image_2', 'image_3', 'image_4', 'image_5', 'image_6', 'image_7']) - for d in data: - - choices_dict = dict(enumerate(eval(d['options']))) - choices = '' - for k, v in choices_dict.items(): - choices += f"\n{chr(ord('A') + int(k))}: {v}" - question = d['question'] + choices - - images = [] - for i in range(1, 7): - if f'image {i}' in question: - if d[f'image_{i}'].mode == 'P': - d[f'image_{i}'] = d[f'image_{i}'].convert('RGBA') - images.append(d[f'image_{i}']) - - self.data.append({"images": images, - "answer": d['answer'], - "question": question}) \ No newline at end of file + def __getitem__(self, idx): + assert len(self.data) > 0, "Empty dataset. Please load data first." + + d = self.data[idx] + choices_dict = dict(enumerate(eval(d['options']))) + choices = '' + for k, v in choices_dict.items(): + choices += f"\n{chr(ord('A') + int(k))}: {v}" + question = d['question'] + choices + + images = [] + for i in range(1, 7): + if f'image {i}' in question: + if d[f'image_{i}'].mode == 'P': + d[f'image_{i}'] = d[f'image_{i}'].convert('RGBA') + images.append(d[f'image_{i}']) + + return {"images": images, + "image_paths": self.local_path_dict[str(idx)], + "answer": d['answer'], + "question": question,} \ No newline at end of file