Added initial code

pytorch · xuzhao9 · Mar 23, 2022 · Mar 23, 2022 · Mar 23, 2022 · Mar 24, 2022
commit 2f096ac94824f86788b2bdbe9bc7c7063a590fd0
diff --git a/torchbenchmark/e2e_models/fambench_xlmr/__init__.py b/torchbenchmark/e2e_models/fambench_xlmr/__init__.py
@@ -1,18 +1,60 @@
 import os
+import sys
+import subprocess
 from pathlib import Path
+from dataclasses import dataclass
 
 from torchbenchmark.util.e2emodel import E2EBenchmarkModel
 
 from typing import Optional, List
 
 CURRENT_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
+FAMBENCH_ROOT = CURRENT_DIR.parent.parent.parent.joinpath("submodules", "FAMBench")
+
+def _create_data_dir(data_dir: str):
+ data_dir = Path(data_dir)
+ data_dir.mkdir(parents=True, exist_ok=True)
+ return data_dir
+
+def _get_fambench_test_root(name: str):
+ xlmr_ootb_root = FAMBENCH_ROOT.joinpath("benchmarks")
+ assert xlmr_ootb_root.exists(), f"Can't find FAMBench source at {xlmr_ootb_root.absolute()}," \
+ "please check out the submodules."
+ return xlmr_ootb_root
+
+@dataclass
+class FAMBenchXLMREvalConfig:
+ """
+ Original config reference:
+ https://github.com/facebookresearch/FAMBench/blob/main/benchmarks/run_xlmr_ootb.sh
+ """
+ config_name = "default-config"
+ nbatches = 10
+ batchsize = 16
+ seqlength = 16
+ vocabsize = 250000
+ warmupbatches = 1
+ log_dir = os.path.join(CURRENT_DIR, ".data", "logs")
+ config_flags=["--inference-only", f"--num-batches={nbatches}", f"--batch-size={batchsize}", \
+ f"--sequence-length={seqlength}", f"--vocab-size={vocabsize}", \
+ f"--famconfig={config_name}", "--half-model", f"--warmup-batches={warmupbatches}" \
+ f"--logdir={log_dir}"]
 
 class Model(E2EBenchmarkModel):
+ DEFAULT_EVAL_BSIZE = FAMBenchXLMREvalConfig.batchsize
  def __init__(self, test: str, batch_size: Optional[int]=None, extra_args: List[str]=[]):
  super().__init__(test=test, batch_size=batch_size, extra_args=extra_args)
+ self.name = "xlmr"
+ self.implementation = "ootb"
+ self.code_root = _get_fambench_test_root(self.name)
+ if test == "eval":
+ self.config = FAMBenchXLMREvalConfig(batchsize=self.batch_size)
+ _create_data_dir(self.config.log_dir)
 
  def train(self):
- pass
+ raise NotImplementedError("FAMBench XLMR train is not implemented yet.")
 
  def eval(self):
- pass
+ prog_args = [sys.executable, f"{self.name}/{self.sys.implementation}/{self.name}.py"]
+ prog_args.extend(self.config.config_flags)
+ subprocess.check_call(prog_args, cwd=self.code_root)