Add llama2 13b model config

google · Apr 5, 2024 · 122db98 · 122db98
1 parent 673a5f8
commit 122db98
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 6 deletions.
diff --git a/MaxText/configs/models/llama2-13b.yml b/MaxText/configs/models/llama2-13b.yml
@@ -0,0 +1,28 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# model config for llama2-13b
+
+base_emb_dim: 5120
+base_num_query_heads: 40
+base_num_kv_heads: 40
+base_mlp_dim: 13824
+base_num_decoder_layers: 40
+head_dim: 128
+mlp_activations: ["silu","linear"]
+vocab_size: 32000
+enable_dropout: False
+logits_via_embedding: False
+normalization_layer_epsilon: 1.0e-5
+decoder_block: "llama2"
diff --git a/MaxText/llama_or_mistral_ckpt.py b/MaxText/llama_or_mistral_ckpt.py
@@ -64,17 +64,13 @@ def permute_to_match_maxtext_rope(arr):
  'num_kv_heads': 40,
  'dims_per_head': 128,
  'vocab': 32000,
- 'num_gpus': 1,
- 'fused_qkv': True,
  },
  'llama2-7b': {
  'num_layers': 32,
  'num_heads': 32,
  'num_kv_heads': 32,
  'dims_per_head': 128,
  'vocab': 32000,
- 'base_emb_dim': 4096,
- 'base_mlp_dim': 11008,
  },
  'mistral-7b': {
  'num_layers': 32,

diff --git a/MaxText/pyconfig.py b/MaxText/pyconfig.py
@@ -64,7 +64,7 @@ def validate_keys(keys):
 
 def validate_model_name(s: str) -> bool:
  # currently supported models
- valid_model_names = ('default', 'llama2-7b', 'llama2-70b', 'mistral-7b',
+ valid_model_names = ('default', 'llama2-7b', 'llama2-13b', 'llama2-70b', 'mistral-7b',
  'mixtral-8x7b', 'gemma-7b','gemma-2b',
  'gpt3-175b', 'gpt3-22b', 'gpt3-6b', 'gpt3-52k')
  if s not in valid_model_names:
@@ -76,7 +76,7 @@ def validate_no_keys_overwritten_twice(keys1: list[str], keys2: list[str]):
  overwritten_keys = [k for k in keys1 if k in keys2]
  if overwritten_keys:
  raise ValueError(
- f"Keys {overwritten_keys} are overwritten from both the model" 
+ f"Keys {overwritten_keys} are overwritten from both the model"
  " and the environment/command line. This isn't allowed.")
 
 _config = None