ggerganov · DOGEwbx · Nov 13, 2023 · Nov 14, 2023 · Nov 14, 2023 · Nov 14, 2023
diff --git a/.gitignore b/.gitignore
@@ -97,5 +97,6 @@ tests/test-quantize-perf
 tests/test-sampling
 tests/test-tokenizer-0-llama
 tests/test-tokenizer-0-falcon
+tests/test-tokenizer-0-deepseek_coder
 tests/test-tokenizer-1-llama
 tests/test-tokenizer-1-bpe
diff --git a/Makefile b/Makefile
@@ -8,7 +8,8 @@ BUILD_TARGETS = \
 TEST_TARGETS = \
  tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
  tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
- tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
+ tests/test-tokenizer-0-falcon tests/test-tokenizer-0-deepseek_coder \
+ tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
 
 # Code coverage output files
 COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -69,6 +70,8 @@ test: $(TEST_TARGETS)
  ./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
  elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
  ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
+ elif [ "$$test_target" = "tests/test-tokenizer-0-deepseek_coder" ]; then \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-coder.gguf; \
  elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
  continue; \
  elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
@@ -728,6 +731,9 @@ tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o
 tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
+tests/test-tokenizer-0-deepseek_coder: tests/test-tokenizer-0-deepseek_coder.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
  $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
@@ -166,9 +166,36 @@ def from_model_architecture(model_architecture):
  return RefactModel
  if model_architecture == "PersimmonForCausalLM":
  return PersimmonModel
+ if model_architecture == "LlamaForCausalLM":
+ return DeepseekCoderModel
  if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
  return StableLMModel
  return Model
+
+ @staticmethod
+ def from_model_name(model_name: str):
+ model_name_lower = model_name.lower()
+ if model_name_lower in ("stablelmepoch", "llavastablelmepoch"):
+ return StableLMModel
+ if model_name_lower == "gptneox":
+ return GPTNeoXModel
+ if model_name_lower == "bloom":
+ return BloomModel
+ if model_name_lower == "mpt":
+ return MPTModel
+ if model_name_lower in ("baichuan"):
+ return BaichuanModel
+ if model_name_lower in ("falcon", "rw"):
+ return FalconModel
+ if model_name_lower == "gptbigcode":
+ return StarCoderModel
+ if model_name_lower == "gptrefact":
+ return RefactModel
+ if model_name_lower == "persimmon":
+ return PersimmonModel
+ if model_name_lower == "deepseekcoder":
+ return DeepseekCoderModel
+ return Model
 
  def _is_model_safetensors(self) -> bool:
  return Model.count_model_parts(self.dir_model, ".safetensors") > 0
@@ -201,12 +228,14 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
  return gguf.MODEL_ARCH.REFACT
  if arch == "PersimmonForCausalLM":
  return gguf.MODEL_ARCH.PERSIMMON
+ if arch == "LlamaForCausalLM":
+ return gguf.MODEL_ARCH.LLAMA
  if arch in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
  return gguf.MODEL_ARCH.STABLELM
 
  raise NotImplementedError(f'Architecture "{arch}" not supported!')
 
- def _set_vocab_gpt2(self):
+ def _set_vocab_gpt2(self, tokenizer_model:str = "gpt2"):
  dir_model = self.dir_model
  hparams = self.hparams
  tokens: list[bytearray] = []
@@ -235,7 +264,7 @@ def _set_vocab_gpt2(self):
  tokens.append(reverse_vocab[i])
  toktypes.append(gguf.TokenType.NORMAL)
 
- self.gguf_writer.add_tokenizer_model("gpt2")
+ self.gguf_writer.add_tokenizer_model(tokenizer_model)
  self.gguf_writer.add_token_list(tokens)
  self.gguf_writer.add_token_types(toktypes)
 
@@ -817,6 +846,26 @@ def write_tensors(self):
  self.gguf_writer.add_tensor(new_name, data)
 
 
+class DeepseekCoderModel(Model):
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ head_count = self.hparams["num_attention_heads"]
+ head_count_kv = self.hparams.get("num_key_value_heads", head_count)
+ self.gguf_writer.add_head_count(head_count)
+ self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
+ self.gguf_writer.add_head_count_kv(head_count_kv)
+ self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
+ self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
+
+ if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
+ if self.hparams["rope_scaling"].get("type") == "linear":
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
+
+ def set_vocab(self):
+ self._set_vocab_gpt2("deepseek_coder")
+
+
 class StableLMModel(Model):
  def set_gguf_parameters(self):
  hparams = self.hparams
@@ -854,6 +903,7 @@ def parse_args() -> argparse.Namespace:
  "model", type=Path,
  help="directory containing model file",
  )
+ parser.add_argument("--model-name", type=str, default=None, help="name of the model")
 
  return parser.parse_args()
 
@@ -880,7 +930,7 @@ def parse_args() -> argparse.Namespace:
 
 hparams = Model.load_hparams(dir_model)
 
-model_class = Model.from_model_architecture(hparams["architectures"][0])
+model_class = Model.from_model_name(args.model_name) if args.model_name else Model.from_model_architecture(hparams["architectures"][0])
 model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
 
 print("Set model parameters")