fix convert scripts

ggerganov · cebtenzzre · Nov 1, 2023 · Jul 18, 2023 · Jul 19, 2023 · Jul 21, 2023
commit 56abb9a406ef34a995c56be838ebe6529cd50438
diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py
@@ -154,7 +154,8 @@ def parse_args() -> argparse.Namespace:
 if "rope_scaling" in hparams and hparams["rope_scaling"] != None and "factor" in hparams["rope_scaling"]:
  if "type" in hparams["rope_scaling"]:
  if hparams["rope_scaling"]["type"] == "linear":
- gguf_writer.add_rope_scale_linear(hparams["rope_scaling"]["factor"])
+ gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ gguf_writer.add_rope_scaling_factor(hparams["rope_scaling"]["factor"])
 
 
 # TOKENIZATION

diff --git a/convert.py b/convert.py
@@ -205,13 +205,13 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
  rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None
  rope_scaling = config.get("rope_scaling")
 
- if rope_scaling is not None and typ := rope_scaling.get("type"):
+ if rope_scaling is not None and (typ := rope_scaling.get("type")):
  rope_factor = rope_scaling.get("factor")
  f_rope_scale = rope_factor
  if typ == "linear":
- rope_scaling_type = RopeScalingType.LINEAR
+ rope_scaling_type = gguf.RopeScalingType.LINEAR
  elif typ == "yarn":
- rope_scaling_type = RopeScalingType.YARN
+ rope_scaling_type = gguf.RopeScalingType.YARN
  n_orig_ctx = rope_scaling['original_max_position_embeddings']
  rope_finetuned = rope_scaling['finetuned']
  else:
@@ -231,10 +231,10 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
  n_layer = config["num_hidden_layers"],
  n_ctx = n_ctx,
  n_ff = config["intermediate_size"],
- n_head = config["num_attention_heads"],
- n_head_kv = config["num_key_value_heads"] if "num_key_value_heads" in config else n_head,
+ n_head = (n_head := config["num_attention_heads"]),
+ n_head_kv = config.get("num_key_value_heads", n_head),
  f_norm_eps = config["rms_norm_eps"],
- f_rope_freq_base = config["rope_theta"] if "rope_theta" in config else None,
+ f_rope_freq_base = config.get("rope_theta"),
  f_rope_scale = f_rope_scale,
  n_orig_ctx = n_orig_ctx,
  rope_finetuned = rope_finetuned,
@@ -247,7 +247,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
  config = json.load(open(config_path))
 
  # hack to determine LLaMA v1 vs v2 vs CodeLlama
- if f_rope_freq_base == 1000000:
+ if config.get("rope_theta") == 1000000:
  # CodeLlama
  n_ctx = 16384
  elif config["norm_eps"] == 1e-05:
@@ -263,10 +263,10 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
  n_layer = config["n_layers"],
  n_ctx = n_ctx,
  n_ff = model["layers.0.feed_forward.w1.weight"].shape[0],
- n_head = config["n_heads"],
- n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head,
+ n_head = (n_head := config["n_heads"]),
+ n_head_kv = config.get("n_kv_heads", n_head),
  f_norm_eps = config["norm_eps"],
- f_rope_freq_base = config["rope_theta"] if "rope_theta" in config else None,
+ f_rope_freq_base = config.get("rope_theta"),
  )
 
  @staticmethod
@@ -834,14 +834,15 @@ def add_meta_arch(self, params: Params) -> None:
  self.gguf.add_rope_freq_base(params.f_rope_freq_base)
 
  if params.rope_scaling_type:
+ assert params.f_rope_scale is not None
  self.gguf.add_rope_scaling_type(params.rope_scaling_type)
  self.gguf.add_rope_scaling_factor(params.f_rope_scale)
 
  if params.n_orig_ctx is not None:
- self.gguf.add_rope_original_context_length(params.n_orig_ctx)
+ self.gguf.add_rope_scaling_orig_ctx_len(params.n_orig_ctx)
 
  if params.rope_finetuned is not None:
- self.gguf.add_rope_finetuned(params.rope_finetuned)
+ self.gguf.add_rope_scaling_finetuned(params.rope_finetuned)
 
  if params.ftype is not None:
  self.gguf.add_file_type(params.ftype)