Automatically use bfloat16 in some cases

EleutherAI · norabelrose · May 3, 2023 · May 1, 2023 · May 1, 2023 · May 1, 2023
commit a6d78307bd476d0bd51767bcf117000476bed65e
diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py
@@ -172,7 +172,7 @@ def extract_hiddens(
  # welcome message on every rank
  with redirect_stdout(None) if rank != 0 else nullcontext():
  model = instantiate_model(
- cfg.model, device_map={"": device}, load_in_8bit=cfg.int8, torch_dtype=dtype
+ cfg.model, device=device, load_in_8bit=cfg.int8, torch_dtype=dtype
  )
  tokenizer = instantiate_tokenizer(
  cfg.model, truncation_side="left", verbose=rank == 0

diff --git a/elk/utils/hf_utils.py b/elk/utils/hf_utils.py
@@ -1,3 +1,4 @@
+import torch
 import transformers
 from transformers import (
  AutoConfig,
@@ -19,10 +20,29 @@
 _AUTOREGRESSIVE_SUFFIXES = ["ConditionalGeneration"] + _DECODER_ONLY_SUFFIXES
 
 
-def instantiate_model(model_str: str, **kwargs) -> PreTrainedModel:
+def instantiate_model(
+ model_str: str,
+ device: str | torch.device = "cpu",
+ **kwargs,
+) -> PreTrainedModel:
  """Instantiate a model string with the appropriate `Auto` class."""
+ device = torch.device(device)
+ kwargs["device_map"] = {"": device}
+
  with prevent_name_conflicts():
  model_cfg = AutoConfig.from_pretrained(model_str)
+
+ # If the model is fp32 but bf16 is available, convert to bf16.
+ # Usually models with fp32 weights were actually trained in bf16, and
+ # converting them doesn't hurt performance.
+ if (
+ device.type != "cpu"
+ and model_cfg.torch_dtype == torch.float32
+ and torch.cuda.is_bf16_supported()
+ ):
+ kwargs["torch_dtype"] = torch.bfloat16
+ print("Weights are in fp32, but bf16 is available. Converting to bf16.")
+
  archs = model_cfg.architectures
  if not isinstance(archs, list):
  return AutoModel.from_pretrained(model_str, **kwargs)