EleutherAI · Quentin-Anthony · Mar 4, 2024 · Mar 3, 2024 · Mar 4, 2024
@@ -1058,7 +1058,7 @@ Text Generation arguments
 
 - **prompt_end**: str
 
- Default = 
+ Default =
 
 
  a single prompt's end. Defaults to newline
@@ -1100,7 +1100,7 @@ Text Generation arguments
 
 - **eval_results_prefix**: str
 
- Default = 
+ Default =
 
  prefix to which to save evaluation results - final fp will be {eval_results_prefix}_eval_results_yy-mm-dd-HH-MM.json
 
@@ -1844,7 +1844,7 @@ Args for deepspeed config
 
  Default = None
 
- 
+
 
 
 
@@ -2144,4 +2144,3 @@ Args for deepspeed runner (deepspeed.launcher.runner).
  Default = None
 
  Adds a `--account` to the DeepSpeed launch command. In DeeperSpeed this is passed on to the SlurmLauncher as well. Sometimes necessary for cluster rules, or so I've heard.
-
@@ -57,22 +57,13 @@ def _split(input_):
  if world_size == 1:
  return input_
 
- # Bf16 convert
- dt = input_.dtype
- if dt == torch.bfloat16 and get_fp32_allreduce():
- input_ = input_.float()
-
  # Split along last dimension.
  input_list = split_tensor_along_last_dim(input_, world_size)
 
  # Note: torch.split does not create contiguous tensors by default.
  rank = get_model_parallel_rank()
  output = input_list[rank].contiguous()
 
- # Bf16 convert
- if dt == torch.bfloat16 and get_fp32_allreduce():
- output = output.bfloat16()
-
  return output