Skip to content

Commit

Permalink
Add llama2 13b model config
Browse files Browse the repository at this point in the history
  • Loading branch information
morgandu authored and JoeZijunZhou committed Apr 5, 2024
1 parent 673a5f8 commit 122db98
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 6 deletions.
28 changes: 28 additions & 0 deletions MaxText/configs/models/llama2-13b.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# model config for llama2-13b

base_emb_dim: 5120
base_num_query_heads: 40
base_num_kv_heads: 40
base_mlp_dim: 13824
base_num_decoder_layers: 40
head_dim: 128
mlp_activations: ["silu","linear"]
vocab_size: 32000
enable_dropout: False
logits_via_embedding: False
normalization_layer_epsilon: 1.0e-5
decoder_block: "llama2"
4 changes: 0 additions & 4 deletions MaxText/llama_or_mistral_ckpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,13 @@ def permute_to_match_maxtext_rope(arr):
'num_kv_heads': 40,
'dims_per_head': 128,
'vocab': 32000,
'num_gpus': 1,
'fused_qkv': True,
},
'llama2-7b': {
'num_layers': 32,
'num_heads': 32,
'num_kv_heads': 32,
'dims_per_head': 128,
'vocab': 32000,
'base_emb_dim': 4096,
'base_mlp_dim': 11008,
},
'mistral-7b': {
'num_layers': 32,
Expand Down
4 changes: 2 additions & 2 deletions MaxText/pyconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def validate_keys(keys):

def validate_model_name(s: str) -> bool:
# currently supported models
valid_model_names = ('default', 'llama2-7b', 'llama2-70b', 'mistral-7b',
valid_model_names = ('default', 'llama2-7b', 'llama2-13b', 'llama2-70b', 'mistral-7b',
'mixtral-8x7b', 'gemma-7b','gemma-2b',
'gpt3-175b', 'gpt3-22b', 'gpt3-6b', 'gpt3-52k')
if s not in valid_model_names:
Expand All @@ -76,7 +76,7 @@ def validate_no_keys_overwritten_twice(keys1: list[str], keys2: list[str]):
overwritten_keys = [k for k in keys1 if k in keys2]
if overwritten_keys:
raise ValueError(
f"Keys {overwritten_keys} are overwritten from both the model"
f"Keys {overwritten_keys} are overwritten from both the model"
" and the environment/command line. This isn't allowed.")

_config = None
Expand Down

0 comments on commit 122db98

Please sign in to comment.