From 5f2bca4ab8385e8703149e12164951248725ce77 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Tue, 18 Jun 2024 09:21:00 +0800 Subject: [PATCH 01/29] [Refactor] fix internlm2 dispatch (#779) * fix internlm2 dispatch * add detailed RuntimeError --- xtuner/model/modules/dispatch/__init__.py | 2 - xtuner/model/modules/dispatch/internlm2.py | 277 ++++++++++----------- 2 files changed, 134 insertions(+), 145 deletions(-) diff --git a/xtuner/model/modules/dispatch/__init__.py b/xtuner/model/modules/dispatch/__init__.py index c2033c3c8..7cb159515 100644 --- a/xtuner/model/modules/dispatch/__init__.py +++ b/xtuner/model/modules/dispatch/__init__.py @@ -125,8 +125,6 @@ ) ROTE_DISPATCH_MAPPING = dict( - InternLM2RotaryEmbedding=LazyObject( - 'xtuner.model.modules.dispatch.internlm2', 'InternLM2RotaryEmbedding'), InternLMRotaryEmbedding=LazyObject( 'xtuner.model.modules.dispatch.internlm', 'InternLMRotaryEmbedding'), MistralRotaryEmbedding=LazyObject('xtuner.model.modules.dispatch.mistral', diff --git a/xtuner/model/modules/dispatch/internlm2.py b/xtuner/model/modules/dispatch/internlm2.py index 6fbb2d0d6..5b855d4ab 100644 --- a/xtuner/model/modules/dispatch/internlm2.py +++ b/xtuner/model/modules/dispatch/internlm2.py @@ -1,62 +1,16 @@ # Copyright (c) OpenMMLab. All rights reserved. -import warnings from typing import Optional, Tuple import torch import torch.distributed as dist -import torch.nn.functional as F from einops import rearrange from mmengine import MessageHub +from transformers.cache_utils import Cache, StaticCache -from .attention import (SUPPORT_FLASH2, flash_attn_w_mask, flash_attn_wo_mask, - varlen_flash_attn) -from .triton_kernels import apply_rotary_emb - - -class InternLM2RotaryEmbedding(torch.nn.Module): - - def __init__(self, - dim, - max_position_embeddings=2048, - base=1000000, - device=None): - super().__init__() - self.dim = dim - self.max_position_embeddings = max_position_embeddings - self.base = base - self.inv_freq = 1.0 / ( - base**(torch.arange(0, dim, 2).float().to(device) / dim)) - - # Build here to make `torch.jit.trace` work. - self.max_seq_len_cached = max_position_embeddings - t = torch.arange( - self.max_seq_len_cached, - device=self.inv_freq.device, - dtype=self.inv_freq.dtype) - freqs = torch.einsum('i,j->ij', t, self.inv_freq) - emb = torch.cat((freqs, freqs), dim=-1) - self.cos_cached = emb.cos() - self.sin_cached = emb.sin() - - def forward(self, x, seq_len): - # x: [bs, num_attention_heads, seq_len, head_size] - if (seq_len > self.max_seq_len_cached - or self.cos_cached.device != x.device - or self.cos_cached.dtype != x.dtype): - self.max_seq_len_cached = seq_len - assert self.inv_freq.dtype == torch.float32 - t = torch.arange( - self.max_seq_len_cached, - device=x.device, - dtype=self.inv_freq.dtype) - freqs = torch.einsum('i,j->ij', t, self.inv_freq.to(t.device)) - emb = torch.cat((freqs, freqs), dim=-1).to(x.device) - self.cos_cached = emb.cos().to(x.dtype) - self.sin_cached = emb.sin().to(x.dtype) - return ( - self.cos_cached[:seq_len, ...], - self.sin_cached[:seq_len, ...], - ) +from xtuner.parallel.sequence import (get_sequence_parallel_world_size, + post_process_for_sequence_parallel_attn, + pre_process_for_sequence_parallel_attn) +from .attention import SUPPORT_FLASH2, flash_attn_wo_mask, varlen_flash_attn def rotate_half(x): @@ -66,9 +20,9 @@ def rotate_half(x): return torch.cat((-x2, x1), dim=-1) -def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): - cos = cos[position_ids].unsqueeze(unsqueeze_dim) - sin = sin[position_ids].unsqueeze(unsqueeze_dim) +def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) q_embed = (q * cos) + (rotate_half(q) * sin) k_embed = (k * cos) + (rotate_half(k) * sin) return q_embed, k_embed @@ -111,18 +65,17 @@ def internlm2_attn_forward( hidden_states: torch.Tensor, attention_mask: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_value: Optional[Tuple[torch.Tensor]] = None, + past_key_value: Optional[Cache] = None, output_attentions: bool = False, use_cache: bool = False, - **kwargs, + cache_position: Optional[torch.LongTensor] = None, ): - if 'padding_mask' in kwargs: - warnings.warn( - 'Passing `padding_mask` is deprecated and will be removed in v4.37' - 'Please make sure use `attention_mask` instead.`') - - # overwrite attention_mask with padding_mask - attention_mask = kwargs.pop('padding_mask') + if isinstance(past_key_value, StaticCache): + raise ValueError( + '`static` cache implementation is not compatible with ' + '`attn_implementation==flash_attention_2` make sure to use `sdpa` ' + 'in the mean time, and open an issue at ' + 'https://github.com/huggingface/transformers') output_attentions = False @@ -146,64 +99,68 @@ def internlm2_attn_forward( key_states = key_states.transpose(1, 2) value_states = value_states.transpose(1, 2) - kv_seq_len = key_states.shape[-2] - if past_key_value is not None: - kv_seq_len += past_key_value[0].shape[-2] - - # This modification is necessary for sequential parallel - assert position_ids is not None and (position_ids.max() + 1) >= kv_seq_len - cos, sin = self.rotary_emb(value_states, seq_len=position_ids.max() + 1) + cos, sin = self.rotary_emb(value_states, position_ids) query_states, key_states = apply_rotary_pos_emb(query_states, key_states, - cos, sin, position_ids) + cos, sin) if past_key_value is not None: - # reuse k, v, self_attention - key_states = torch.cat([past_key_value[0], key_states], dim=2) - value_states = torch.cat([past_key_value[1], value_states], dim=2) - - past_key_value = (key_states, value_states) if use_cache else None + # sin and cos are specific to RoPE models; + # cache_position needed for the static cache + cache_kwargs = { + 'sin': sin, + 'cos': cos, + 'cache_position': cache_position + } + key_states, value_states = past_key_value.update( + key_states, value_states, self.layer_idx, cache_kwargs) - # repeat kv for sequence parallel key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) - if SUPPORT_FLASH2: - # the shape of attention_mask used by flash_attn and - # F.scaled_dot_product_attention are different - assert attention_mask is None or attention_mask.ndim == 2, \ - ('When using flash_attn, attention_mask.ndim should equal to 2.' - f'But got attention_mask.shape = {attention_mask.shape}.' - 'We can pass the `attn_implementation="flash_attention_2"` flag ' - 'to `.from_pretrained` method when instantiating a Internlm2 ' - 'model.') - # flash attn 2 need (bs, seq_len, nhead, h_dim) - query_states = query_states.transpose(1, 2) - key_states = key_states.transpose(1, 2) - value_states = value_states.transpose(1, 2) - - causal = self.is_causal and q_len != 1 - - if attention_mask is not None: - attn_output = flash_attn_w_mask( - query_states, - key_states, - value_states, - attention_mask, - causal=causal, - training=self.training) + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + # In PEFT, usually we cast the layer norms in float32 for training + # stability reasons therefore the input hidden states gets silently + # casted in float32. Hence, we need cast them back in the correct dtype + # just to be sure everything works as expected. + # This might slowdown training & inference so it is recommended to not + # cast the LayerNorms in fp32. (InternLM2RMSNorm handles it correctly) + + input_dtype = query_states.dtype + if input_dtype == torch.float32: + if torch.is_autocast_enabled(): + target_dtype = torch.get_autocast_gpu_dtype() + # Handle the case where the model is quantized + elif hasattr(self.config, '_pre_quantization_dtype'): + target_dtype = self.config._pre_quantization_dtype else: - attn_output = flash_attn_wo_mask( - query_states, - key_states, - value_states, - causal=causal, - training=self.training) - else: - # use flash attention implemented by pytorch - # do not support sequence parallel - attn_output = F.scaled_dot_product_attention( - query_states, key_states, value_states, attn_mask=attention_mask) - attn_output = attn_output.transpose(1, 2) + target_dtype = self.wqkv.weight.dtype + + query_states = query_states.to(target_dtype) + key_states = key_states.to(target_dtype) + value_states = value_states.to(target_dtype) + + enable_sequence_parallel = ( + dist.is_initialized() and get_sequence_parallel_world_size() > 1 + and self.training) + if enable_sequence_parallel: + query_states, key_states, value_states = \ + pre_process_for_sequence_parallel_attn( + query_states, key_states, value_states) + + dropout_rate = 0.0 + attn_output = self._flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_states.shape[1], + dropout=dropout_rate) + + if enable_sequence_parallel: + attn_output = post_process_for_sequence_parallel_attn(attn_output) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) attn_output = self.wo(attn_output) @@ -217,14 +174,21 @@ def internlm2_attn_forward( def internlm2_varlen_attn_forward( self, hidden_states: torch.Tensor, - attention_mask: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_value: Optional[Tuple[torch.Tensor]] = None, + past_key_value: Optional[Cache] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: - # Modified from https://huggingface.co/internlm/internlm-7b/blob/939a68c0dc1bd5f35b63c87d44af05ce33379061/modeling_internlm.py#L161 # noqa:E501 + + if isinstance(past_key_value, StaticCache): + raise ValueError( + '`static` cache implementation is not compatible with ' + '`attn_implementation==flash_attention_2` make sure to use `sdpa` ' + 'in the mean time, and open an issue at ' + 'https://github.com/huggingface/transformers') message_hub = MessageHub.get_instance('varlen_attn_args') rank = dist.get_rank() @@ -238,6 +202,7 @@ def internlm2_varlen_attn_forward( f' set to 1, but got {bsz}') qkv_states = self.wqkv(hidden_states) + qkv_states = rearrange( qkv_states, 'b q (h gs d) -> b q h gs d', @@ -250,40 +215,63 @@ def internlm2_varlen_attn_forward( key_states = qkv_states[..., -2, :] value_states = qkv_states[..., -1, :] - kv_seq_len = key_states.shape[-3] + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + try: + cos, sin = self.rotary_emb(value_states, position_ids) + except RuntimeError: + raise RuntimeError( + 'You are using the old version of InternLM2 model. The ' + '`modeling_internlm2.py` is outdated. Please update the InternLM2 ' + 'model.') + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, + cos, sin) + if past_key_value is not None: - kv_seq_len += past_key_value[0].shape[-2] + # sin and cos are specific to RoPE models; + # cache_position needed for the static cache + cache_kwargs = { + 'sin': sin, + 'cos': cos, + 'cache_position': cache_position + } + key_states, value_states = past_key_value.update( + key_states, value_states, self.layer_idx, cache_kwargs) - if use_varlen_atten: - cos, sin = self.rotary_emb(value_states, max_seqlen) - query_states = apply_rotary_emb(query_states, - cos[position_ids].squeeze(0), - sin[position_ids].squeeze(0)) - key_states = apply_rotary_emb(key_states, cos[position_ids].squeeze(0), - sin[position_ids].squeeze(0)) - else: - query_states = query_states.transpose(1, 2) - key_states = key_states.transpose(1, 2) - value_states = value_states.transpose(1, 2) - cos, sin = self.rotary_emb(value_states, kv_seq_len) - query_states, key_states = apply_rotary_pos_emb( - query_states, key_states, cos, sin, position_ids) - - if past_key_value is not None: - # reuse k, v, self_attention - key_states = torch.cat([past_key_value[0], key_states], dim=2) - value_states = torch.cat([past_key_value[1], value_states], dim=2) - - past_key_value = (key_states, value_states) if use_cache else None - query_states = query_states.transpose(1, 2) - key_states = key_states.transpose(1, 2) - value_states = value_states.transpose(1, 2) + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + # In PEFT, usually we cast the layer norms in float32 for training + # stability reasons therefore the input hidden states gets silently + # casted in float32. Hence, we need cast them back in the correct dtype + # just to be sure everything works as expected. + # This might slowdown training & inference so it is recommended to not + # cast the LayerNorms in fp32. (InternLM2RMSNorm handles it correctly) + + input_dtype = query_states.dtype + if input_dtype == torch.float32: + if torch.is_autocast_enabled(): + target_dtype = torch.get_autocast_gpu_dtype() + # Handle the case where the model is quantized + elif hasattr(self.config, '_pre_quantization_dtype'): + target_dtype = self.config._pre_quantization_dtype + else: + target_dtype = self.wqkv.weight.dtype + + query_states = query_states.to(target_dtype) + key_states = key_states.to(target_dtype) + value_states = value_states.to(target_dtype) # repeat kv for sequence parallel key_states = repeat_kv_bshd(key_states, self.num_key_value_groups) value_states = repeat_kv_bshd(value_states, self.num_key_value_groups) assert SUPPORT_FLASH2 + + dropout_rate = 0.0 if use_varlen_atten: attn_output = varlen_flash_attn( query_states, @@ -291,6 +279,8 @@ def internlm2_varlen_attn_forward( value_states, cumulative_len, max_seqlen, + causal=True, + dropout_p=dropout_rate, training=self.training) else: attn_output = flash_attn_wo_mask( @@ -298,7 +288,8 @@ def internlm2_varlen_attn_forward( key_states, value_states, causal=True, - training=False) + dropout_p=dropout_rate, + training=self.training) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) From 72b645f328b889c92fbf5f00a1714ef8d685ffd0 Mon Sep 17 00:00:00 2001 From: Wang Xinjiang Date: Thu, 20 Jun 2024 17:26:21 +0800 Subject: [PATCH 02/29] Fix zero3 compatibility issue for DPO (#781) * fix zero3 * reformat * reformat * reformat * reformat --- xtuner/model/dpo.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/xtuner/model/dpo.py b/xtuner/model/dpo.py index 782f4d20f..b46ea1c50 100644 --- a/xtuner/model/dpo.py +++ b/xtuner/model/dpo.py @@ -16,23 +16,6 @@ from .sft import SupervisedFinetune -def create_reference_model(model): - if is_deepspeed_zero3_enabled(): - raise ValueError('DeepSpeed ZeRO-3 is enabled and is not compatible ' - 'with `create_reference_model()`. Please instantiate ' - 'your reference model directly with ' - '`AutoCausalLM.from_pretrained()`.') - - parameter_names = [n for n, _ in model.named_parameters()] - ref_model = deepcopy(model) - - # if no layers are shared, return copy of model - for param_name in parameter_names: - param = ref_model.get_parameter(param_name) - param.requires_grad = False - return ref_model.eval() - - class DPO(SupervisedFinetune): """A general class of DPO and its variants.""" @@ -50,7 +33,26 @@ def __init__(self, self.beta = beta if not self.use_lora: - self.ref_llm = create_reference_model(self.llm) + self.ref_llm = self.create_reference_model(ref_llm, **kwargs) + + def create_reference_model(self, ref_llm=None, **kwargs): + ref_model = None + if ref_llm is None: + if is_deepspeed_zero3_enabled(): + raise ValueError( + 'DeepSpeed ZeRO-3 is enabled and is not compatible ' + 'with `deepcopy(self.llm)`. Please instantiate ' + 'your reference model by modifying key `model.ref_llm` ' + 'in your config with `AutoCausalLM.from_pretrained()`.') + ref_model = deepcopy(self.llm) + else: + ref_model = SupervisedFinetune(ref_llm, **kwargs).llm + # freeze parameters + parameter_names = [n for n, _ in ref_model.named_parameters()] + for param_name in parameter_names: + param = ref_model.get_parameter(param_name) + param.requires_grad = False + return ref_model.eval() def _gather_masked_logits(self, logits, labels, mask): logits = torch.gather( From 937e9937f5f60c21bc7bc4d510936dfc2ba671eb Mon Sep 17 00:00:00 2001 From: fanqiNO1 <75657629+fanqiNO1@users.noreply.github.com> Date: Mon, 24 Jun 2024 11:17:15 +0800 Subject: [PATCH 03/29] [Fix] Fix map_fn in custom_dataset/sft (#785) --- .../sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py | 4 ++-- .../sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py | 4 ++-- .../sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py | 4 ++-- .../sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py | 4 ++-- .../deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py | 4 ++-- .../sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py | 4 ++-- .../sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py | 4 ++-- .../sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py | 4 ++-- .../sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py | 4 ++-- .../sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py | 4 ++-- .../sft/llama/llama2_70b_qlora_custom_sft_e1.py | 4 ++-- .../sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py | 4 ++-- .../sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/starcoder/starcoder_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py | 4 ++-- .../sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py | 4 ++-- 32 files changed, 64 insertions(+), 64 deletions(-) diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py index c1f79073d..558887c04 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py index a2f4d445c..8df388a67 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py index 178cee847..3dc38eb4f 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py index eecce4cce..dc15b6289 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py index 312044a88..09b354929 100644 --- a/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py index c0bd6b2a7..7e3abba71 100644 --- a/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py index d4cf2d9d9..f7621bc6c 100644 --- a/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py index b0142dc76..629012f5b 100644 --- a/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -115,7 +115,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py index cc3d90a8b..122ddf023 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py index a993ecbab..9a3d36b30 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py index 5d5dab1c0..c677c9d09 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py index bc906731d..443a1e663 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py index af01dc2c7..2aaa6f24d 100644 --- a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py index f0c74ce81..dfb423839 100644 --- a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py index 645f9cbf6..313103992 100644 --- a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py index 53b380719..2b0f889b4 100644 --- a/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py index 59d14cfb5..9aa9b6362 100644 --- a/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py index f6aa0f574..0af78f79f 100644 --- a/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.dataset.samplers import InternRepoSampler from xtuner.engine import (DatasetInfoHook, EvaluateChatHook, ThroughputHook, VarlenAttnArgsToMessageHubHook) @@ -114,7 +114,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py index 3eea2a9f5..91cda57ec 100644 --- a/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -114,7 +114,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py index 8f26fe5ad..3066f0be9 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py index 70aa8d7f6..642592f0c 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py index d7196c53a..3790006d7 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py index 0c2808485..36d3e6cd0 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py index 88bc7a3ed..d152c207d 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py index 67b02d626..1098c5ca8 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py index 327c3c7c3..2d517e897 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py index eb5fed1f6..e1156a1aa 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py index 22acb42c8..b6fcaacba 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py index d15b023da..d79484dcf 100644 --- a/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -114,7 +114,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), max_dataset_length=max_dataset_length, diff --git a/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py index 2cbde95a7..4906ab5f7 100644 --- a/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py index 5b1281964..96a684a22 100644 --- a/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py index 7cb010a99..b2349c2da 100644 --- a/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, From 8c3b44d0c8d9936031f94700342d3768a19b8519 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Mon, 24 Jun 2024 19:27:35 +0800 Subject: [PATCH 04/29] [Fix] fix configs (#783) fix configs --- .../llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py | 5 ++--- .../qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/xtuner/configs/llama/llama3_70b_instruct/llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py b/xtuner/configs/llama/llama3_70b_instruct/llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py index 13e1d3888..89feac44e 100644 --- a/xtuner/configs/llama/llama3_70b_instruct/llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py +++ b/xtuner/configs/llama/llama3_70b_instruct/llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py @@ -117,7 +117,7 @@ batch_size=batch_size, num_workers=dataloader_num_workers, dataset=alpaca_en, - sampler=dict(type=sampler, shuffle=True, seed=1024), + sampler=dict(type=sampler, shuffle=True), collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) ####################################################################### @@ -185,8 +185,7 @@ checkpoint=dict( type=CheckpointHook, by_epoch=False, - interval=-1, - save_last=False, + interval=save_steps, max_keep_ckpts=save_total_limit), # set sampler seed in distributed evrionment. sampler_seed=dict(type=DistSamplerSeedHook), diff --git a/xtuner/configs/qwen_moe/qwen1_5/qwen1_5_moe_a2_7_b_chat/qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py b/xtuner/configs/qwen_moe/qwen1_5/qwen1_5_moe_a2_7_b_chat/qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py index 5053f10c0..6e8c2fb00 100644 --- a/xtuner/configs/qwen_moe/qwen1_5/qwen1_5_moe_a2_7_b_chat/qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py +++ b/xtuner/configs/qwen_moe/qwen1_5/qwen1_5_moe_a2_7_b_chat/qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py @@ -184,8 +184,7 @@ checkpoint=dict( type=CheckpointHook, by_epoch=False, - interval=-1, - save_last=False, + interval=save_steps, max_keep_ckpts=save_total_limit), # set sampler seed in distributed evrionment. sampler_seed=dict(type=DistSamplerSeedHook), From b98d41367ae6adb34a8cde1a592ceac688d2a9e2 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Mon, 24 Jun 2024 19:27:52 +0800 Subject: [PATCH 05/29] [Docs] DPO and Reward Model documents (#751) * rebase main * refine link * refine link * resolve comments * resolve comments --- docs/zh_cn/dpo/modify_settings.md | 83 +++++++++++++ docs/zh_cn/dpo/overview.md | 25 ++++ docs/zh_cn/dpo/quick_start.md | 71 +++++++++++ docs/zh_cn/index.rst | 17 +++ .../reward_model/images/preference_data.png | Bin 0 -> 27640 bytes .../reward_model/images/var_len_atten.png | Bin 0 -> 41318 bytes docs/zh_cn/reward_model/modify_settings.md | 100 ++++++++++++++++ docs/zh_cn/reward_model/overview.md | 29 +++++ docs/zh_cn/reward_model/preference_data.md | 110 ++++++++++++++++++ docs/zh_cn/reward_model/quick_start.md | 86 ++++++++++++++ 10 files changed, 521 insertions(+) create mode 100644 docs/zh_cn/dpo/modify_settings.md create mode 100644 docs/zh_cn/dpo/overview.md create mode 100644 docs/zh_cn/dpo/quick_start.md create mode 100644 docs/zh_cn/reward_model/images/preference_data.png create mode 100644 docs/zh_cn/reward_model/images/var_len_atten.png create mode 100644 docs/zh_cn/reward_model/modify_settings.md create mode 100644 docs/zh_cn/reward_model/overview.md create mode 100644 docs/zh_cn/reward_model/preference_data.md create mode 100644 docs/zh_cn/reward_model/quick_start.md diff --git a/docs/zh_cn/dpo/modify_settings.md b/docs/zh_cn/dpo/modify_settings.md new file mode 100644 index 000000000..7b4672792 --- /dev/null +++ b/docs/zh_cn/dpo/modify_settings.md @@ -0,0 +1,83 @@ +## 修改 DPO 训练配置 + +本章节仅介绍与 DPO(Direct Preference Optimization)训练相关的配置参数,更多 XTuner 配置文件的细节,请参考[修改训练配置](https://xtuner.readthedocs.io/zh-cn/latest/training/modify_settings.html) + +### 损失函数 + +在 DPO 训练中,你可以根据需求选择不同的损失函数类型。XTuner 提供了多种损失函数选项,如 `sigmoid`、`hinge`、`ipo` 等。可以通过设置 `dpo_loss_type` 参数来选择使用的损失函数类型。 + +此外,你还可以通过调整 `loss_beta` 参数来控制损失函数中的温度系数。同时,`label_smoothing` 参数可以用于平滑标签。 + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +dpo_loss_type = 'sigmoid' # One of ['sigmoid', 'hinge', 'ipo', 'kto_pair', 'sppo_hard', 'nca_pair', 'robust'] +loss_beta = 0.1 +label_smoothing = 0.0 +``` + +### 修改模型 + +用户可以修改 `pretrained_model_name_or_path` 对预训练模型进行修改。 + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft' +``` + +### 训练数据 + +在 Reward Model 训练中,你可以通过 `max_length` 来指定单个样本序列的最大 token 数,XTuner 会自动对数据进行截断或是填充。 + +```python +# Data +max_length = 2048 +``` + +在配置文件中,我们通过 `train_dataset` 字段来指定训练数据集,你可以通过 `dataset` 字段指定数据集的加载方式,通过 `dataset_map_fn` 字段指定数据集的映射函数。 + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataset = dict( + type=build_preference_dataset, + dataset=dict(type=load_dataset, path='mlabonne/orpo-dpo-mix-40k'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=True, + is_reward=False, + reward_token_id=-1, + num_proc=32, + use_varlen_attn=use_varlen_attn, + max_packed_length=max_packed_length, + shuffle_before_pack=True, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict( + type=preference_collate_fn, use_varlen_attn=use_varlen_attn)) +``` + +上述配置中,我们使用了 `load_dataset` 来加载 huggingface 上的 `mlabonne/orpo-dpo-mix-40k` 数据集,使用 `orpo_dpo_mix_40k_map_fn` 作为数据集映射函数。 + +关于如何处理数据集以及如何编写数据集映射函数,请参考[偏好数据集章节](../reward_model/preference_data.md)。 + +### 加速训练 + +在使用偏好数据训练时,我们推荐您开启[变长注意力机制](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html), 以避免单个偏好内的 chosen 和 rejected 的样本长度差异造成的显存浪费。你可以通过 `use_varlen_attn=True` 来开启变长注意力机制。 + +XTuner 中还支持了大量的训练加速方法,关于它们的使用方法,请参考[加速策略章节](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/hyper_parameters.html)。 diff --git a/docs/zh_cn/dpo/overview.md b/docs/zh_cn/dpo/overview.md new file mode 100644 index 000000000..d1bfc4379 --- /dev/null +++ b/docs/zh_cn/dpo/overview.md @@ -0,0 +1,25 @@ +## DPO 介绍 + +### 简介 + +DPO(Direct Preference Optimization,直接偏好优化)是一种在大语言模型训练中用于直接优化模型偏好的方法。与传统的强化学习方法不同,DPO 直接使用人类偏好数据进行模型优化,从而提高生成内容的质量,使其更符合人类偏好。DPO 利用人类偏好数据,直接对模型进行优化,省略了训练 Reward Model 的训练过程,与 PPO 相比进一步省去了 Critic Model,不但避免了复杂的强化学习算法,减少了训练开销,同时还提高了训练效率。 + +DPO 拥有大量的衍生算法,它们对 DPO 的损失函数进行了一定程度上的改进,我们在 XTuner 中除了 DPO 还实现了[Identity Preference Optimisation (IPO)](https://huggingface.co/papers/2310.12036),[Kahneman-Tversky Optimisation (KTO)](https://github.com/ContextualAI/HALOs)等论文中的损失函数,如需使用这些算法,请参考[修改 DPO 配置](./modify_settings.md)章节。我们也提供了一些[示例配置](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo)用于参考。 + +除了 DPO 之外,还出现了如 [ORPO](https://arxiv.org/abs/2403.07691) 等无需参考模型的对齐算法。ORPO 采用了对数比值(odds ratio)的概念来优化模型,通过在模型训练过程中惩罚那些被拒绝的样本,从而更有效地适应被选择的样本。ORPO 消除了对参考模型的依赖,使得训练过程更加简化且高效。XTuner 中 ORPO 的训练方式与 DPO 非常类似,我们提供了一些 ORPO 的[示例配置](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo),用户可以参考 DPO 的教程对配置进行修改。 + +### XTuner 中 DPO 训练的优势 + +XTuner 中的 DPO 训练具备以下显著优势: + +1. **支持最新的算法**:XTuner除了支持标准的 DPO 之外,还支持了大量的衍生算法,同时也支持ORPO等不依赖参考模型的高效算法。 + +2. **减少显存浪费**:由于偏好数据中的 chosen 和 rejected 数据通常存在长度上的差异,因此在训练数据的拼接时会存在填充(padding token),造成显存浪费。在 XTuner 中,基于 Flash Attention2 中的[变长注意力](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html)功能,我们在训练过程中通过将偏好数据打包到同一个序列中,显著减少了由于 padding token 带来的显存浪费。这不仅提高了显存的利用效率,还使得在相同硬件条件下可以训练更大的模型或处理更多的数据。 + +![img](../reward_model/images/var_len_atten.png) + +3. **高效训练**:借助 XTuner 的 QLoRA 训练功能,参考模型能够被转化为移除LoRA适配器的语言模型,从而省去了参考模型权重的显存占用,大幅降低了 DPO 的训练开销。 + +### 开始训练 + +请参阅[快速上手](./quick_start.md)来了解最基本的概念,若希望了解更多训练参数配置相关的内容,请参考[修改DPO配置](./modify_settings.md)章节。 diff --git a/docs/zh_cn/dpo/quick_start.md b/docs/zh_cn/dpo/quick_start.md new file mode 100644 index 000000000..a92152b0f --- /dev/null +++ b/docs/zh_cn/dpo/quick_start.md @@ -0,0 +1,71 @@ +## DPO 快速上手 + +在本章节中,我们将介绍如何使用 XTuner 训练 1.8B 的 DPO(Direct Preference Optimization)模型,以帮助您快速上手。 + +### 准备预训练模型权重 + +我们使用经过 SFT 的语言模型[InternLM2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)作为 DPO 模型的初始化模型来进行偏好对齐。 + +在训练配置文件中设置`pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft'`,则会在启动训练时自动下载模型文件。若您需要手动下载模型权重,那么请参考[准备预训练模型权重](https://xtuner.readthedocs.io/zh-cn/latest/preparation/pretrained_model.html)章节,其中详细说明了如何从 Huggingface 或者是 Modelscope 下载模型权重的方法。这里我们附上模型的 HuggingFace 链接与 ModelScope 链接: + +- HuggingFace 链接位于:https://huggingface.co/internlm/internlm2-chat-1_8b-sft +- ModelScope 链接位于:https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary + +### 准备训练数据 + +在本教程中使用 Huggingface 上的[mlabonne/orpo-dpo-mix-40k](https://huggingface.co/datasets/mlabonne/orpo-dpo-mix-40k)数据集作为演示, + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='mlabonne/orpo-dpo-mix-40k'), + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=True, + is_reward=False, +) +``` + +在配置文件中使用以上配置,即可自动下载并处理该数据集。如果您希望使用其他 Huggingface 上的开源数据集或是使用自定义的数据集,请参阅[偏好数据集](../reward_model/preference_data.md)章节。 + +### 准备配置文件 + +XTuner 提供了多个开箱即用的配置文件,可以通过 `xtuner list-cfg` 查看。我们执行如下指令,以复制一个配置文件到当前目录。 + +```bash +xtuner copy-cfg internlm2_chat_1_8b_dpo_full . +``` + +打开复制后的配置文件,如果您选择自动下载模型和数据集,则无需修改配置。若您希望填入您预先下载的模型路径和数据集路径,请修改配置中的`pretrained_model_name_or_path`以及`train_dataset`中`dataset`的`path`参数。 + +更多的训练参数配置,请参阅[修改DPO训练配置](./modify_settings.md)章节。 + +### 启动训练 + +在完成上述操作后,便可以使用下面的指令启动训练任务了。 + +```bash +# 单机单卡 +xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py +# 单机多卡 +NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py +# slurm 集群 +srun ${SRUN_ARGS} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py --launcher slurm +``` + +### 模型转换 + +XTuner 已经集成好了将模型转换为 HuggingFace 格式的工具,我们只需要执行 + +```bash +# 创建存放 hf 格式参数的目录 +mkdir work_dirs/internlm2_chat_1_8b_dpo_full_copy/iter_15230_hf + +# 转换格式 +xtuner convert pth_to_hf internlm2_chat_1_8b_dpo_full_copy.py \ + work_dirs/internlm2_chat_1_8b_dpo_full_copy.py/iter_15230.pth \ + work_dirs/internlm2_chat_1_8b_dpo_full_copy.py/iter_15230_hf +``` + +便能够将 XTuner 的 ckpt 转换为 Huggingface 格式的模型。 diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index afe6e1c76..4acf0e882 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -55,6 +55,23 @@ training/modify_settings.rst training/visualization.rst +.. toctree:: + :maxdepth: 2 + :caption: DPO + + dpo/overview.md + dpo/quick_start.md + dpo/modify_settings.md + +.. toctree:: + :maxdepth: 2 + :caption: Reward Model + + reward_model/overview.md + reward_model/quick_start.md + reward_model/modify_settings.md + reward_model/preference_data.md + .. toctree:: :maxdepth: 2 :caption: 加速训练 diff --git a/docs/zh_cn/reward_model/images/preference_data.png b/docs/zh_cn/reward_model/images/preference_data.png new file mode 100644 index 0000000000000000000000000000000000000000..a18ea64497b35cc5838febf3005c3bca9ec9ce7a GIT binary patch literal 27640 zcmdSBWmH|kmM*%1;O?#ol0eYl8X%Am9D)Q75D4yW!QDcFLxAA!?jAh2yL)hdwUcx1 z>vOyN_8oWJ9^<|J16jMOR;`*+v*!2B6(lb!iH<^o0)ar#-@F!Agg{{HArP1rBm{6J z-Qqhr1VROQBmPp^DQR!sRZV%kP59{W$6>93_ty5B-^)jM@F?OU3Q7$4FDzf7D1J-m zMvci`fieLp$ELx_jefNe(Z|Qt z#9SDQLnlZ54u_SK+OAl)bsq~W@l7h2wm$=NZ!h_Ze3cS1^X&Apo-ya}gLHmbB z`F?K$A8QupjqE&&&X=WpliV_Ex(F|@aC`7Y(m2D zyB4l*P*?3>sMm_L7z=i&64+l6Aqto$RK-@Oi^hH5*dJ(N{QO65Jy?0+)%XDn%BuEL zFK4Ujx^HiNzmh0FTN-<+_YOfM?)$g6oadW6&wN)`K2TqRQEwQZdDYZ(Ed~VX{ZRyV z-`(UlNtF}0s$n}o` zqdk3!tk$gUbR_mJJiLvso{)Jr%&66v@#WM zd^s}}r((ZkBv(j~pj5su{8o<*)pqf&mX!a{MtThXs!UYxhyTz%UNbk}(K(_@Q~a?K z1tr%b5vM)Ne3dpXV)TDpx;d8IHfQJgV?OCwRkqouT*PGm`0lM->;j#S{-2>uq}bR* zJcNKM9DPk?*?XKJpEGV!ITr@CVH521bg!!uzkN7F-^t@uzsa80x(fo%Glp${G%Lrs zXAog93Fc?tx<%0Pd^5|O#>>w~n>h?rFO;XW%Oyx{_V-~}Sib8^I?sHPCLm~Qm^y z{QYHv;;iBNb@?HRtvog^EfAQ+m)K8i$Ojfqkn=lcfP7;9+*! zSMujh!0mPaaTWhw?tMA9V?Tc~mVDHJwLuAv)KNpjz@$sBlV`x&`jpY}R?r-qeVme< zssBZH$0DOJ-dsn7U-gx%-`3MKc?Mm}QGAxOJ%9P2)ZNP2l_=MJ1RKk_<)KPsB?dgJ zid|TGSN?u?7Lm=v&m%>j3Xo0S7_sPYUy~Ef3=ucFI{1YdN>*5Oh8xi2%@;>_iYaWn z8*D7LZNGhJ`91xGt*xFidQiJ7+Mb{7yQT1H$u7Jh;oAz6ajAn190hU#k6R*IM=1p> zK}^C?menu3U)bjISwIJg%WHF@&B6|>Nu%Yd!sqwdUquf#qN8pvN=a(D8LjzF2+5?M zzPe1hV0E(+&d?rZ5Ue*`$5*TKKZ(f`xFI%PXed|mfV?klv3Z|zM!P|*LaYxg)zQv2-+mHG6 zewvc6ThMTRe)yE>6_QM>xz|73G@$QmIfW4sOBOoQZW@y^Uy?I)JnMNM)`AU(uEmL6 zDRL8@O_RUGRX5A4+fjmT!yOQawlvkd%D~j$9=YJzCg90fn<&Pv{>6%P{h_ksr!z;G z1A0a%W6x6t@5GESwmiCYf}Z&iga@W#Uokn!+C))tgq5jLY+=D?-NQorYZfTI#;+++ z*R&a5>~MzSIGmt{=;`-HJPzz(HDlsWPcOsy$^6ZR+bU4RX{-uMM{t0r(|vmr_(c#BKFO`Z>GM|GS}&(V+kS-FBg_4!Q&;#Cz!p zM#UyCtQ5%@{3LS7?NrEB&dJG%tS1&9>sQYEd2E}R*QqrdlbN+B(h@opmEa+EkR@QR zIBMAfMt;-ah*tl63JhjtZ4VFGn`tD?kcjXP$!FBm)GB>O1Dol1u$8>JvJ-q*UUoFS zF+jAop2@2dlaLS$k4k8=L%g>BPG9R~Yb#GwQs~&2@}s7^s=>;4&+wI>eI2xy2%`uC z7kgbC_7}oTCyTZi_@up`GnEbrkmyt8jyj3Y1Oq{gECa=VTX?NKfV;oP6i%kyaRdTc6k>RV5 zES{@X3b6ZGsLc+?UA(#Tl@ttjaX2UOD@~B^0w47Hn)jQpndNJqja%uuQZnC$^l>NX zar%?)U_LY|CHcv2u7?>AMcmRZ1QI8#ADVTLw;nr~^4+<)^MX}ZP43CTe1Rk8TO+v~ zYiav4{YtIG9hUne{HZgY&^P6dt*%~zF80NDcX-{$!JE(c<+}9u*=9NCkbRlyNfq_ zF_IlH+%!qnXkAlTSB1>^VZTcGwNx*7OJy#OM#jYW1_z^%@wySI$Hyn!(MNwNH9r1OQ(wPoCWTVQ z&VgNRnVMAumD2dv=xFb)=NeeGE}Y04#dGi^M#IX< z$u$U@elxIYa=l;{6%}JMS$l0@_>qQ@1GlxcRSh5evsS&M>Cab|3!%yU4iJdL!BpDQ zw_{1r<^SQ~_{jE~S;t6)>(u!=Eu{lP|3EXX zPHOADD^F`84f#yr0P$K~+*n#KRXC#1jR>gS{SGci5{n(5-^^lrUZOx+vUYWrHsnad!;ucg|~1|-*7h} zg)mT4JgQK1O<1I(33E2DHsFzaUZf*L(Kq4dbEt^AbE<7uV(jC^c2Hw&4bSC{;|1ot zwcyD{lkr8t?nWmew)IZQ)Vsxn8`1=!xiLGsI z2bzBIJC?9Aq2;R-v?O|XBO)8E@3*=>{nY4!K_(DbX*!|1B(ncF&)waPb=L>8L~m81cUX3e zY`)o+6w$%5V^Qdq2xE`};uWq{N5k*}_Tpgf7jo3(NjcjZrzaW-9|2@|Vh;AOUoB8p ztH63=O_lGg2qiHX+furXpMxM>Kej00w}&O7zRl5xWQvJY3Za&vug}mZ`4w!Ne9945 z`!j9>ig!|`JoqevIxXJ!LBUZCh4I&{2nwbO*2-bm~%G7%UZ6PK7 z2tHy}2koccZ7aIFSJ)O`W2kA_9?`FSq}B~}ZQKo>b|*mA$Z7>jV0S+B9w%ZvWN{dI z5JrCvyL8L`(eu#J7*eh&n5Tqx?Q5C&ui~x_bPWs^Cs2cWjck@zGwoY1BGBVS{O4u1bP%0jx>gPqPS1Kr5Au}O-Xa-dgo%ovGR^SDKOD5 zrUPs4*;AjwO-Uq_5LXj_9uSC0gpuV|56#Vy2?+_kmYvW^=G&DJTxU}R&A?UKTPswM11fkmh zMceCu&B;xu7@n>~XeyI~SIJr30*s4~8qi1n8E%c@swj;&^!caPZ zxbW6?JboVkScV5(S8A&^T=pM%sRl%}UyTNwo!^AJYgp=F1}s2)y|fa6B)WESKfb20 z1yZ}v9_)F2Q#9d1qW``6{;J~3jGW9J|T zqc3mFy<^$R0$b{Gq34dT;z;7-o9~)uEHYc9OBUIaKK0L;@!{t7mxO1~vj;y#kUwxC z5^N0ZRH#1t5whR=bofDe)UzUSL8!FRvS~u{Pz^DJ3yDbM;o(tW5;+sNM<6hM-NHSu zkwXg!c{R;_`k$eCu}3Q*EK&!K)(#SHa_@Q4Hd9<>Ep=R6_+q3C!zN1(CeR`Bc1jnX zF%%Tn5Awl3+880qDk?ksyNBCP)hB?AKzv=}`QQ;YNo^u)fp_aA`)$Jh+3w8bTbl>e zWIIoS6HhFmkNB?2`KG28pP^<_zaYeAJ4XUr?w3MH5>E!7L32I0hVui`&bH)dF;_2#<$x6}&#HroWYwa4F424!)NdLfn zLA9LO+p}TjG8G5oG0s$p!gUKsZ}EeB1y@sJMHtPoBLADmCgC1YpCmlS zz2@*~U!_Tz=?at>JpJuGZeuB!R?$(7la|)a>+G z%+im2Z#wpbLgEiGy146oI9C%AIsCQ2MjegTfJrfSU;07ojm$HSAE}=AwO#ta>B{ZB zq%)D7so;#IJz8LFgg4^eXZ=yNK>+n4{t?N6AX1X02oeK3C6Z1(p~aoft8Qkk<(WxT zuwyN*nHTo@xY9}#HkYMu=#h6dag9sg8VlNWvAQ|$J%eg@h9h^s%OQTK(|GR4nTGu* zOb8!tOUl~WGV>+aSx$W1tQel6F2L=<7}J+%4K@U&IFKti?WPj(^}Mvz9@!7DI?+Oo zSeP#nuzPV`=P-IoHpyvB`qtzcy~PMqqP_jx zXW}UaWlrv7x#Byh3rNeCk?UPu>EM`b(!S){rh@1t<)zJDJz1Gr3V3j>?qUa;oHIZYr$esQWjUsWhzqh z@~YcJ$Hh>K`G5aWp=_z8p)hxUM2(A{t@Z?R*F;Uit7OB2@q<02nZ*7#=iqu|E9RTV_5m0|RlX z$BZ_khk!_$AV2oC+!NDKY|Dla!P0!%h;_s4RTqz{!6ywx^Lw1#d3nlTS1(oTNyZRz zEB!6ChdR%0&1#&R(x;7#9%zcz-HW+<<;>`bV`~g1iqOA6wc~1%v?(UB+=tezFm=** zBo9PCzBfnj`1EF`TXBH*qXCgg)x^;5qxR#Bd8^-M6oXoIMdfR8Jrmc?jqYb!$GX6l znEF${npvt}u>=a+Wy+qd=`;s`Fl4Gy4+k1Lf#3ukW61_2PZ+*RfwPJR$Jo+LrD+^7 zS!mVAk529d_tKV6L9nqj8i-U79M}@HW-yS4y5d7&V`Ga=Hg;ssV9Mb!$742Cqi`~e z41!}8T-?$R>r#z|zd9Dy+uB5B)YCW8H5?@y1PsuG7HgDoe;q8=h6!q@W&lUlI8L-< zqR_#W#BYxT4sE6`PKV&jj>g1N+Y^+2^>i(g;oeM243A^S>zCBAkQx5GdLD=JHa*X5-X6B;TUgyH$~xPfCr8i1Q0#mvLP<> zUm6$#b90)w{i+3;$eEe6;ch2kBQp}pI;cX^-cSkLf5Egq9DFV?IYH**AKux$ODZH( z1q5j}6(0&rg4MMQ)1yckoEMEOW6@%Po)3GRMmrn}Qu7gr@3E5u;|T=@&PNh8^FNfQ{vH@~3g7x|9&T_%iE^Yv4WZ zZIXPh?IrsU%86J?U#PXHIYM0$caQ}zp78xE2#Dr0YPmF;Q`dZd`oJ-VNrCYPF_s)EyB6G3IUy5{O>X0mrM*iL=+Sh zai>KdmdNqp+#BM0quIM_76_H{K=B{*Kdw}l>EVDcCm4LS?6FOI8rsWkg{w_$%tEZJ zk>0aq;1>Jd3)i~bptfn>mxDn-zv0Y5kJ(z6Y z2KLBzq;5V4aa|(zpW=NAK5SNKvtZGn4VJpn*QsC)%AckW^TkYof2CxS?VAr zy>U7<(zX1)uO(``r%@d_`=HC6CrqJhk2(07EaKtB@=-XEnyVg7`ujb82MYDcx5M*; zFpvE)V(WRu4Iq+VBGPggFvYYULWhr4Kmy62jP9Q9r{}v5>jJBVQwAXE6q$Q}c_4)m zp47`KaV9?IO~(7>a|~l`ptcZcEC=)B^;`AYL~-%37@K>hA_MK#jdfdV)^!*Mvej&^ z=Of|znLJMC)ElzyvpJKW@WEnQY)PfwolvIjovL>12?wV>oIM<63s?9%&vH3!5c{RE zyb`C|n(S^~mgp5ss)_XbfvSG#bbxniZ|!B+y;*wFGI==!QeYE>4~XWerXfZt%K{qd z*{!SH5ek~`y(>;bUN_poEK|ygHZQadoy`btrd`Lv4&5t+VQhGcFmAwqIa};Dxs_KA z63o6Qbd_C@ybSnqq*#0)(U_&+&lU*sn^=|h!ZtlbST?W>gCB)G^(dM&pADEZ^}Ael8Hn(u z8(kV}UFn`tzYq_lisTUfS+{drL1ltDJvWI!L=lC zwwW3;lsFVZaj1_UMdN)ReXqcKEyKCcD401_4n^S%7#LNI#qx_;tj?Z&SApy7Z27(3 zb{~w=G9)*Pn;4%fXmn*mMK8%fiIa|Ao(_jtVNXbiY6v`;t7hJ0jo)8ff!()XT6b4P zPb2qHbTmX&bw4$h86TF3>d`_nzu$f$ccs1Bdz0$P0vP3IX%taJk%a{nHvlaXNlwUX zr1Q&UnU%oI5X2o!td120C`O1&=<-ly>XI0og zY7|ASOm4}Dp^#IwQ{q!)ZXa=Vh0MsAqui3K)Ru?l#rpC&n}WvR_&cQT&Q8%IFzUIg zGQ`?CtQixx{f{CTAo(DCs_uM;o5yc0TED-lWUD3SZ>(zRgrllTO{Em}PyB9YYBm~2 z@A5NGfry*2_&BGH(GWqEn3!LHxSINQ_)uZpT3jt}i9RB$)%PQ+pgh5jch#NLSU*|= zq@WlmaWGHAQU~(Bs=ZVumgcLJlo>2DLwGbIsxfMWMHo2Wlm#$&B|zumRP5v6s2dtpEMcLSMuMfW4r6>Xq{-)xo79rN1ZqD5E)c6mraLBu$xr14`I*V@c1oVK<|clhs$(Kse1UsqcPi(h^0&{d3T zSXuEcal9y+d(PG*o9t^vN|aMC%q)x4d~wh_T9h?bTaI;}-;5~H_7}KPuUDCE@70z` zbL6LmcRJ1aG&3HPGFERc{^tRoMED@`YFEJ9w^ z=+8Lc`}S2;ccALg8Xg{Xa|u!ZxVSPQiXW_?d5lwT_H)s!>kJ>i_zg96XZQ(=w<3dQ zG;3O$hxqB)exhk2Ec}cO#+E*j7e73TR{tx6D`oyid2FfO9R}L|X6S-oeOp)AgsTjl|9A#V_0+an40Bf2c)srI+yHGl-{sk4ca4e@W^B=1OX;X;soOao8s!`z zz|bDM-EoDUsC)?AEw|d6Ab1#+qIR_HeSLqU7B-jol_&WM6(cK##%<7*NT?=Nq4*Qn z@XLOqFljZz=_AzZ#l$AEF~1b2E3nTu8qP2^I^CKfHorebWST9(6|8$&YSu1$*?bUm ztSOqqH^}guDX3JsJLgI zjE3FI+_kxVopJIwuP>EtXc1t1kpO@OC9H)FYim`_0g@0vbTfrs)!YkNX z{+Xq0N&KzD5disMMSVVXJp7#cvyZJpjvqi?>?FUsVQVK*!d%cZeCKc5K1`|oUPYo| zM2e|8GIBt=w`;Be#$p`>!V+8Z)jk229}ll^o?~%F4<;^JwYW zzmk8s+Hj}6>2+>?zTe2@KXyur6nD%EFt48Zm~2^)mbgT)U=O+o*Qw9z1pu*@>cxrLDydA6#rah`m@|?AEgfr#Vth-AE2-kNCJ)IyYBZ91g|=E+Ttf??>-V_;{k|OK>CSY4W^DC zfG~{13FkeK0P58<$T*%q3wpYpZc{hj{dc2WzZg^hKZ9~TslK9g^EJc{k-yD>a0n-% zh#=(^J2%2WcpNqu@r0kX)WykJ@5$o`p&%Dm4qhkexaB{(e@x%jJB>a4Dpmn>>QiNr zm`XCdq&p^>yE%J#oB@mGVNZq7%!kz(fnqm<0*Oj3fXXtClnWJHiPW+qmK_W~qU2Rz z=}=+O03!$$qqm+0QGUP8mbCX+YlVH5iFstTxF?*6!%kcv!=aIqGxXWX{xkTsbjmoS-+oeV#MwI?|es4%=0;+SQ3a zb@~PZ=A7pG&>0viD*8g|)oC`V_7X8jh!WpwLT#)IyCyanafrdhA-4%8i*uElpOC zTO#9e!UBnQr_muyh^~c2ICNAFj&^{EobB*h9Z^jbsr!6}tBlAw2YHYJOraM4Fj87E zfHPU2&nip1dZGYPk4#hET;A0io%h9JVn~?A z&-oLJ03Rhe$c!t+Sr-x$rpCLzX#=Eb=S+YEK>?wfcZ&W19)hAoAVqgp#Q5<-{fSvT z0ub`Zw6N(L0TmVxF?t@vP;1GK^hxXrR#_#BDP6=Ln#1T{0G6q@QSTEK<*vcP3c9)w|Nj1b?7OFmlBJK$4e!g?BwF8 zQdvQ5EiZ_Etsd_0NQ%YA$doaqKNOorXwRE6c^+%N4?BZuyID3fx@W(utOK*=b;A?q zg2MPCez9A%hn3nFYO`Zx#;JA%c{4kuyLxbt8tcLm*)vXG4V`VahsOCN7zpOUL-9zF zjQ?b-=)+?59PF|}l_K@~JRWQsgG?b533e;zSK}zE z@7F&L1$@6>LvMyPIJlIC33tJmxHWPG$vE zt9$w7J{T5uEU6-LDSA1p1_C$TPXE={5BGEg9!nu@6<;~e$+#uSxV|ly1`Sf(p%sk9Si$=e+ zOr@s1pn_7OG0(O*wux%lm`UjPDy30bW4SW?)zD>~w8Q?vkGwnDscnN=)0I%+=Ju0v-e@S<8A?$y@nn zU#BWvt3T$nR+>(-a_|tD5A+{hPnFu|wU@mmbR)t3{0m7ozkU4y&hZ>Dpz2p=7<^NW@cX4#r|$^}m!H@#P9Joe~8 zlSYpFd?=eT9z!_Udgg2%_%Ri1uF`In-UidP9E2-(k;L{co&^P8FU@ZFpa%gd1!!{Th5{JMUc=J5^Jhl`}qy%1qHct$R= zSWV=WH|fIYF=oYLmxIF7xoA3wR1rIA$2-Q(x^%9^G=8k5y0xA$& zBKKrI{RBXUNV&~Kq@+*~TwU8c$MLDt(pb55OaV<(na?~){EHr^8m5KnNa zB(QHPF&l>u=&&O)PWtO>b(V4fqC*`1~kl)yd`)-$zS+s6dHqrn-3bM{Bn)ZCgvi--i_7o%i2%KE?i%~Ra;eC zrJrYkIAM}MYW6CP6FP=dt1RypiKTW@E)s1U5lmQ^%d?kpW5H5_{RmEef)dSq7~fRB z#Wjo9Z8iW|juIc|;SN$J1jL}iUxut|GVsZDCmABLS}0SBOK!=*l8?!$!5cPo{L)=P zK^vzZK|d{NFBPQJ)zHk_$Tiwz*6MB=xR#SBkZB=ul+k)O3yq}*SOdO^r>W$R`xlK3 z-aLO&gvx1GX`s7kWT*#7n{s!!9p2tP%gf83-yAkEIx6)9F2v#baRiupny9_SFSjOT zPI+spg4-dT{Eq(yL5}Ll=gBxLyn_pq-@Dfc7Hy1h`)tCmJ?AgKF#uE;1ba*3b4=<* zmi)_TC__Y+joN?npHaQ+@2Bq0jxD?L5(3{79bfboHw%5Mr5#YZl8eBqW=wlC@s*Yx z?|Ad7IRgD2hC}dG!peI9@P-$==T1L*P5L5r`(tEvjBjv=rxUktJvr$6G z(Zm`gUPDaTaT4V)t?>#VNlFj7abq@X3kANThkftGiHqA;9!@zMEHX$4 zJMv4r-{1p|u{grfRf=U)iZz^=%L8%R>h5`t(Dy_;O$3n1-#P&3L{7%GS z(b`f^Jet0;ocQyeC=TM+wb3C1%eIkr*REtd1sP%tsMFHbj&9&RNW`0w!9ZX#ATJ@1480lQFdp$a|4 zSwe5?Ep`<0HCVSA1shCyW&`SNQ39b9et35+GTrIz;!Cqh?AzOuhxXL_DH4MHnWcCm zo}q7BpN@8mBaTUrE~-~z&RUIK*3FLgJ3NPj=&ME>sWXQPK43B(iO@AxO)z1N5O zoE{8$9Vto=A%gUWk8j)GIOVE3LdoLoMB;!g9yn7+$vbI5(_CU9*FjAK>y^SK#WU^L ze*5Y3mTsDRhbc6bXci(lc$U~x@1W5|fTQ5fYgg_4r4e0p5q0>cR=e7q(xw=HC1BUl z(2SZBKKmefd8{1)tU}?*MT=S}$)AFdz_pH{2WW!(e>b81Kgyc^3Hbc8HtPS*08t}X z*Vm$H@>}lH)6>)p3`n`Txi;JiTv1U`pkBb9IW-|6&-o!ZIGDsvTd8sJtdU!PQcV20 zl4&iei1>4#!oufSGKpE`P+kZf9ReF0+f=isFf1&rfyw;nb{|V~$L-b0t}+)h^I%UQ zHH}FMfGKj`OifR}K`5`a-E5njB*dkY_5y?&Gcz*-L{^DfRV@Z;F zb#d_l3;?_I4o*+2AJ7TdlaZ4zZ*2H$)jN#8=X&-Gg~Mo2Tvk>Vw<0t!#~GTYp8opv z>$npWCnr8Y$&4491o2SdXR0kDzJI4m?p5!Ope(j-Q&CXB2A4nR>(lN!Iyy?q%SXy; zczb*68X0BB53e5_g!lBk?mfXrfnQl&RZv#`5g)j@yPH4h4+a?!WWYmt^ym>4KmSe8 z;ahW2QPIexq}z@l6R_S4Oibdsy1IpDfQ`5-Lx+Zh)N;JqZ7PtRcA*k`$cP<94dSV& zy(-H^49M1EbMxRmh^-oKfA>Kr+H|!+nk4K=;d*h<`IS+1X=5V`S^2RJKYHv|k)pjHaIIh5Lcb+XB3F~`HCR`R{8MMG0F z6im6{_fJM1&wBxtV(miSZ^_B%+}zwr0?s%iBO?iX=H}+hD=XfIO*cg;hu*dRrajbfpeqXM0A)6XAB8nm*Lmm!1)_<{ zh#MFHI;IIFC1vBCy1M#KJcm&pOpeiDQm$=5Nr}8U7+AZ}rESzH(k%9386kV^7S?%}ZU*Oa=%cmp3=7*{L4UnnJs~#m)YSiKKjX zTU6?)fM)_(8cgPg_(q({$yf8{==riRh)4lh)?832*Cz{bV~ zU9B)LFEM0kd0An#cC{-kfAq|BWp7V&Rh-lM!+V)SYAUo>3@j|&-(UKqIFWmLdIH@s zzMmU*%|E>O8kkf1$Cd+k_kwjo2%ZCtZo{R2VrXFSTF6-@O@3)TRtEtxfC9(G#Z^*S z+3AN$_+pci&kpJS{vP6MSdD{&1L}|DH3bWR=}~iW5i~V5@qH=k*nMMdZ|}a+Nb0(; zn;T>TypnxFoQk?SvIr@==?4o7=GtZ;gH&J^3i`)sIO0!2fFGd&z6U59kM)YL0Wr|i z0-fgjpfkIJIrZ}W!@~$)-$wpr@W*N%+{<*h$6$bRt$rzDl7e@EgBINA-YY)!VDZ0x*s)F1klD zB-y~tpFDZ8CD_D6Ljw!3y1PE3_c+zjdG+d*#QyyLa2g!&qgc4O8OYbbyCvK>M^~B8 z!a}6pzD-q=%~X7IVA%sz5-IU9B5<-4FGEH`@`{MS1Rm*hV#fXMY*Il@O|95owUf37 z+}`?Nju(xTOM3His#M?l_u3mge0-?m1g6K=jf;Z=w1Aq}&J@*0vEzDlbkvl|_Gqcq zWs;Hh#S6Uk_4UAri0)jaY_iq~}^)hjz=F5@W^j9HIhHxppJZ6*9UxjwZMx-Po47Ow2O&h=%mD~iQuq%e@;&m16^&)dzX(zM3i&oG~3lB z!O_1Aq`|jpw7RkamfPp<=3*C~&(hM;*Ut}T`1~{&m=^>n6;xJd*RU-WpGHe`TVWr; z!+)Z4`19bM?(jFu2Dr~#&q=K%@px zDR!3-&nnIekP4&_a?1gpEmE`i91LOtj5E2?`R?xSY$6yRpMXG0=!fW&qLXK5OGuUP z)i&R0QdaF$<`r8q!cx`BTAgtPVS2V4nxXI~y*N|RQayiTViJ&(b)isL700`Who%>M5QVF392?`t6c z|2_j{%Ag=GQEE3o|DW?DG40|k3qlD;R z;FXm9`sQd8odT)W^Rbv$Q4taGvTGFgov`8df`+0=Lg>=*G~@y-y?u{Q|H%c=#A?DI zg8)KKrlPA^+oNS72oE1DKE8-W(GfhvbONb3Ev;pyvaL!HIW8XA5C_NWmm(GCxYLEi zY0L*GP#=}npHhfkM@=na%?7AJs>QJP_igucn$&Z_>>QOZ9sdx6cvzl@lYV>!rBO6B ziN+_9E%mY3@6yna|0$+t_eCeIXdPSBnI%Pq6cvfMH4NW8J3mShLPsNpoomQ_ZuBX4 z9#jukV~CLc_$Mn!Mm+itH^~4gmibe4ci0!^la0ig{SZRGe^7$3yUr%Y#27K$r|R(I zgtUz=gps=d|_6yPqKLBceJyEBAu8uU;S^Y-|xBqUG+Q7-}3kWx%wRy zjk4%DHy{MvI|4$GRZ$KWU5>LW#}Uq(KkqSM_sv^1$HmR;Q%*Lc^M4-&STaSbnXt0S zI4Xu%L%Hu$K@od+NQgas)4P8VfoT3RfeeuDu0K^{Qp7OZ{{G?My4I7hOTsR$7N5qo z(!ouWhYi(~C~C0u`(ZM#hsiAyfd*_7<@k+=Ne1VMn=<|qA-4)p)Yv)wM_^?i{{PWH!6TPKKvh*zQ30J zQ~4e!P4Z_=$S4G`DDGzSzLv;{OZ;KiJqe)nj9H+g@%dBW9o;^d{A*U+K>iPL#^yD& z@ckcOnV9@*;X93w(*LM@H(~pa3*TQEq(eDbDFoBgE%6*8&b4#T)BI~ZbuRDm@sK>j zgJDI*{PsCbE2t+ID2{%^@;St0Cy()f%J+*QaA^59dU=^UL8tx z<5We&v|*wj8O^(%+Ou9=GnYdJc}MXv&DvTRV3#8!E&cI;ffKmHNQZ!gf}=9m(-T?G z`5gLU4F%hZe<^zp%t-_6pUm|??4M}R2xk(!_QE85FqKeSP+-j*5t}pol7s19TImav z+-DNr(feE}t7jI2#w4lti=XPTQ9=S)02^RQNdBYneb3`h;k!8c8Q}kTqpW^2d*3$? z0fp~ae+u7m1n2+of7A|CDvbY!La_Z$>@e&8qR%n^LJ1-x7ZtY$745^L5<#&pGc*Qy z!hC9RpTdKhRXrGLf+$ffPcBeKl)%}yWg&}EduJLwZQP?McN>fZ^CY=3G;`Nc*I3x- z^k5pF>l^t<9X|g+P>w^h&^`LA%!{aaPt zW3-q?^4@`(?ziIhN0-O?F>_B}wVnxRN;ikfZAf=n6*t*%=XZXXGXDWMJeRU50c<|= ztUvo?4mPkxl=eW@g-qzn=R49^nx%HRCC*{e1s2S2;7~h;^mKD*McM=<+VNFcfGQnY zeHb3W8L!99P%}2qhwpPT)HJvyhfdGRP%JX)li?)LNCAf^um=*JED&Dh=@L4A@<8XP z^!|y*#KP;pyqhRcb!|X1M{a&J^jnJO)Gns}ski}ExFGp)dj$8og+xea$?WSZzs{1< zJqAaPz#j1_H3#umhP2=T*P&dSz_JiplwgbA`A~uoy-CQw6p;G}5B>fi^%%Ut;2&>n z8F&xMrq%ddz2^p~(zEf}neQIX4+NQhnjDf|PX2a3_fRD2aXeJUpw40VKt&Uro}L!D zDUQeXe54!B;NubuqnHx*a7?gI7DKKVPS)~g1-ut{n1RC*ZyqUQqz_f1Bbw>BUVLow zl(ubW<%VT|hCDc5a#9~s9zPXp1co8lQelaS*NWA4Fhx8c_Ex#dRM`dC8gTGGFy5tZ7%rw7@cbq9sb}7o(1(#L7aWYhZY|goP|d zc-9XQVBAI%ObmDlkJ0I6lSJ(8ITgdn6yD+}t=kmbui>JRyg)J<9R>37xxBmat3p9V z<(!d{lao}IMr#ulgI=F6Eq(p951Sa@@B4WENTsQ2QeRKY^r;;WKmY0-rjUs9D?bVT zi-UPF5nU=eP(3=AM7FlE)&j&sDkfS&&n%lmh))~@MuB*NAEH|6Rco_xGCcxcybV(& zmW)+s9XuSdAxGPamX$7?fdBi@k6CptX7wT;2RWCUbQF(S7f2dFHj}RpIL*xeL`U(nqvr+4$WcHzBxzl z;y68P-@CrNPpNU=t?RPw?DSpA&fh$C7N-gS_>P8Y0|_eCqoLb`Ke8(MI4zBLmlQBg z&F?q0kW!1Ju$+&D-;|eYS?X^;i-SdA`v?Mlh#S{8arS*omn`38>Om*i(7z!;@v-t<$AF{nv?qVYi0xU7i5ro#aF|} z^9|db)KAFHwi+1gc$AjN)mo(^Y!(tYZ2Z>a_Y)uszb3?~S^c_LLJT_E)~+suQ2{|1 z2zxwqY+YmHz>10&VGiIcy}peBlZ+-{8kf4gIA8!QnVF!Obg)9!tjw?v<#x%eGc)6B z_LuPf-^*6Q-{UyvQ(;=H8X-MSKw#JJSFb>smu8Bz{vG+gp44YUe{Sr8)oA3Ip52tl zN3zIQ@fGwW~X>tEwAX-G=H|-1=oi)al1PD!phSH*h_{>hQ@nyE-$WJrq!q< zpCs5mh0~`WXwy*oh?-Hu9V%8qxPJ7v|e;4w4wgvgkky3w_+xPPhJ_)~+%v%C1`vl1eBbAt4G1 zND0y*AP(h7cS+YENF$vBN(o4bf(+dt3?(qMlr#*|A)V6A*~9yNf6jHTbIy-%e$Dml z{nU=N*S^<%uLldlT3zTvQWG1yc*&~K5(9`Dxw-wUw5#hJx?0S!m{D@LHT|Z90a1~$ zyeQH4YX~9lLXY?3Lp*9k;k-?!rKiH6vOSgN@Gn@@yyS%9L40Hk2W&3;G72sR$d#g6+ znTa_p#Ucfougq}vD#?hw7n3UlE-qy9@Kmkyemz@qSc675&0BFq#j@6W%|B>`5@NG9 z?Mi;Eq!n3*osjcTB7?Nj?F}>yF6eQi04G8DIf`9f!7@z+Az~uhS5S0r*|Yq8*boh7 z5U8-;MqJ+t#EnvXZz%lVFc6N5pMS>?PdIKGj%<@foIzITWk;uW1L}qPo zZ8~4q@|z35Z)*r8kwwk;x!tNYXP^tIe_zC0+-zKuzifxC>aMIalDC0Y7ZfW@M}oc( zP&B<)r|Jv$e*vH@fWCYl3XY8>@9d-j64<3)agNI`J>P8eY`a!Ljv!X{$FfT7`I;*U za^oJlhqC6!^o13W*q3^>s=sBpU0E8G5mSJjU_M`rQ)GE@Ny5q{rh2EOk!e= zN|rL~y@W1ls`u|fHizdr?!v0ll;v2GUQ*YxJl*+)%n_MfX6EnhY+czniGzcKiF;to za2q=sxkalF%zs90AhnWV62w?ecbeu9uuhCG` zV1ZdcbDsuiQVoS7`jlF!VvsFWxmzY%2g?06f;(LEV67ZutVMfj6QWo1RMcmW(w_EG zG{0~5oE%lT;^nC~W9G#}-0?%abInpm)SSy)(%l2fs~sk{(CmN~A5eNha-P7Qray7tI=+wyn=zCscuMD+G++28POG?__-Hr3@ z

I<*mgop%U2<<7QsaY#5OMP$3B3$Wel7FFY;0uxq(fsiB{kVEDxg9;aSn7C52VN(H0(WJrSRmFqOqRa%@-3$L zw9SaxuF;cYRdC#{3^OEqK=1F=qa+SmS{uO z*;onuc5d@pT3Tk%2@k;2Yao4fo^CI9_gX4K32HAJ@gbkaqrJ};5@^QLI?SHj=P^VX zV|Tw)h~8;+JQmm}VWf6DDbuoo}w* zY+5nI#``gs|7MWypb)^XH0#rMB4;2AuEupIWB1>7i6evTdJq~VC9CP0y~ZW%*8-WO zmF#UE4L4QCKKeUK&1Wd|Y(vMp=*&xW&P0mackI@zu*EOKNG*{&Ge-JBGNiQSUJ!746& zNMZM48ag&K3y7UcnIwp(Yr=5K7!<)L5h6Hraz0?3ci5qEt1e$ySE|Nz^}D@S^$Hyf#e)_Zv1d#)gF*~Q+>l1g(DL%8DkgA-jYpO&}#Vd$Diz<(Da@DhP zITC!kGme_&?aBPx2}FvczU#7orr4errwC9BE^_8J-)H0$Vw1oNM2SBe?Zj0LK_m!$ zAD6<~wVdgW^U5$YgVEn0MLIpY^IHz(NUvfe(UQ3ile_#|Fm#_(g)sB-Ev1%Yk64ekH=tjeq(uJgo z&O_DSrMZ@4{eshWs=y6fqp|%rqu$$|X9we`{?hqN|7VuiwR>bgSwhDGS(H+~mDdvK zBs{*iJLit`*!$*zfY_ue+n2U)4{zq@k=8R|9lxtoQPyX35rPgy?v?Td7P{Ol(4Gn8 zYD5047ca<@85P9ej!Ld{C_8GU*MdPgEp4r_^lS(V^k)hWsOEo8^Nlm zKob5IafayYe_Aj%pF70CySBK9+_$xDrE0Jp&zh>J7M|JQoZ=!)OCQ1lO0<>ma=r$% zme^l0-&e;=GtYo1t)aW%V5~c-(+1L4%%E&QtdA->!-gaavisuUUDK;zHB3&z3J&fR z!6$(%EJ#{dtUaYw5p9jeBu%tE-fkX)3f=?iZQK-bT#)F`qFXuM5LNWjPQ%d=PCkjW z@jvrg@W#SIb`*3r26m?2z1t~reB9tRom(jlg$rStGJ|-$VwgFL*J3EL?V4X8_Q%8= zI&Y1L?W{}?KWf+*NAAB=urYiyRWOC8oGh1|jE;BrU?Yb!lKtx$s@|3!AQKYaezhKi z8yZ%9JNun3Yy11Vd8d0FaYSEUmiYQ87KeV$%f$D~NmqSn{oLJs!$wKzuZYXiil!F3 zuZR$Yka8&A```{x?>X)Xa4UOXys@|6Sfdy9<1vLKy{wjuP@@io@?a8?#*7_obXHgL zTCgsyT6Wr5w>|0b!k5#mQES zybR6OK{erGtu%<@SJlnVZ#CI=o+Kolf2zvWy=V1Y_-`>x?=0OL;CYFmHIy5-e;h;0 z%g?zerWANTHI{0&t*(CLW?>Z$pQR<`55)|XXaTPy;|;5If9%y=TPMd)gQV(u;lC2o zUaDi6Kh@J)pqs4Oby|zO)@3I7x?soX0}1^QRo1Y#fp2+YXy5FL+NPEi(2SXNbRd7Q z;o#)Fxfvge-JO)4k~1Rsni@oYM3*Y;JoDBvGm-DNN{l<389O^a$(O>7^(L)J{GRmx z>eVvDk%iJ>nr46oAAe!R$_72coX*x9jHOq}Mvktzo|~QR=F)ky{?hq9)WPg3_pCFe>=ms;&ho0w|A7}k#>yMMXZKxj zpdS91H}4PIoAGx%ElDuctY=1O0sta3bXof3*zzwLI>dI>)pc?|+`>?xu<@V58ZVtQ zkT~$h)tr9(!kJH58A3#tbEc;Jje`lH*?g6o^i|Ndy;19pg|$HZIEic}tqYK!ApUUz zGZ!QMPjH!>!mcjtCr|wH*m9ZDmK-kYceb_`6&8F$La_b)JJEp6##}uz;+N0fhs@`5 zQ0=0oWxu>wU(bjPS_Lo&bV#%HCwnZS^96_;+b)kNBu$(gSeSdimsoL9?sqA#^9^ z*4S<7phHibme_pr#0MYT0iG-)9CmRaIFov*UbODB!H>E75`Z2Z@hP%&V%At)AW*1! z2nw~sLT-RK*g&HFSd=m%0|3B)fsY1^zg+2_{|CEUC8+?rtEpkS+k@SX(g_HdQ?+N~ zBjuL{(f7We14M83N~Idk?kt>~Bu&zu z)td-94i{~Yuns%Irv-VzKPMIv#Rf>p4FJ*=5;|7;Df~5EN4tzv3(XP7DIM>WYujE& zk7|fCl+%7&j*`SH^&NSsG2Lt&LF^>ujT@JEzGnw-j>4UyMQ$?=V4X%;GFEPLeZk*v z0fr89!E&RedfuZA!^ii{j<11*7JE3WyJpZa{3E##CwpP=lfaD5Wau2syXyT@FqQYs^yk&)TT+gN(kEyzJ?cJbkcc_raHJ-d?F8P0Xl7 zxSIbRI37)|g{^Xj=6p2*VBow=Rn45PT@kn|QuNe~Wpy|w|5I=Nn=Sr2ZD9JG6M>%8 z#RZ8ehEXiUsfKJz$sgCA9K0F~1M_^Z34n~FqCyY|a8r%vGUQ9#?EUHjv4Jc9trj6Q zf7cTqU%%^DD>^!wdUlm3v+j=Jkzmdl%ahP6=P%jLrq%*$4HCZsqJ3Dzko5 zW*s_760#S(00+vO${ z(hwJ>Wo%~_QC!(Fv%c~&P4T?KM^2nsECgKa%sXLL=AsvI&Y!(m_C?rTlXLTeGH}~L zlKaKhC+i`KFr9T>wG{M3JB=3CtJb7g%i$E$iX}1q>bDw&wi=%MU9Cm@`*qCl>zG}B zePnn`o#){#din@}p3y1D`%9(y^%m#=0vgS}(ib8B)m1p~-UC2knwSRH&s8PyM!-q2 z7kElvEi4=gh(A{OfGCt+xVPsFZEd;7!FL-R$3U>^)pXG+Dg?T9G=TcmS{+XGML{W~ ztc-W+at6D<xdEx)JNG+A!9j`BA3D<8=ZE0J&J%Qj99=P58Tvn)guvwO`bl74V$_ZAPy%#z< zcQo;F8v)z~F@l=}3_pAJ@eYm2R=qdPKHvm=LYqHHE2u?W%MS=BaX9w?Tc+DZNuLw% zIS(>p03@*Eq_OQ#N`Y!y<@6mwJUw3??ld)KJOGpMrkF*0!CX!K{Djkixb6pKi;l*QKR5E5bCF{52kKv;~ew+>ectobnD#SkEd>7 zZZp~&jfOzxwIWJD&2Z|CfcD1~5@lSFmpgP(j}d~i**3=!2ifqYb9aXOCO<8@Kd*7N z*xDHfw~rQAadV2=D6A}- zKYL$n>l|~u%Q8=9k|LGsx!L-N%)Epvlq4xSO{R$ik}{?C(Q&y-cM|QH7&b!BXLj$^ zmhqI8ugiFDGB=vvZI6aslzIo$iq4LH!-cT4|J zK;UpVUuUq0XxKTytk1G!B>DVSTgG%KX-)pH4v z6@@eeo?ec_y~&YK*<9><9?`3cvN`#&a;m4bt5H+%WbJE3CPNLoq)2lRucLs>tjrD? zeT{ze!8dkb;#9*0cm^t*99eL7bu~fVShnb5rPt*NIh?@j@sLha4rK-@?ZoG^b;EEi4a=Y7(>p z)tGfxr8MK0M=Y`dcx+`gI#X+gsCXT-^!7n316dt&MS=}%GZl;%tI3ky%BE>Io%B)? z27V&9HwhowGRE+=%)zS@kJLH-W!s#x)BP^$^IddvimUZz$4_YS*3<>3MQ?JhKxPA` zlbybjANrs(#Mi>6oh12G6cxq7quOL?-Q0ErK`|hGnKn}C{Atc*vW6S^I zprT4mm2F7noUf9*`4lT`X049EQZa>MHUFT|g<*^N)9=xP#&vgI={2A7w-mr44P_ku zoM<94+&a0&fR8gD^2bz)Bd?jqvG^jWAxc91hb;1W;A#nxq&vq}X)_CSNm6jHmW@0+ zvYEx-=AE=p-gB{@uN~07u?4ux^iA!coWk*U4^Y77cVbJd+e)Vvm)5`61-tK+xWn^v zEQolSMdMT>)-Dx?pDoHKG&214h+IL)4^hSn+vgzLrc5LaF7PsNaPXvGg*?s8xXt;dJ{IJP*$wO+N zF3P#GxV3jRJEqp7m);yiM8u!Z?kWKDJ=#Lu9+i2Whx1Dvj!fH=FI~c!fA2=z6(zf` zhh|*tGH9{}Agr&QebL{t6e@^J#2qZ9tw-8Ip4Yo03T92Vf@L*s&4Y^7B3IFZnUf~6cCxeG?+Rqh;8w4)Ob8eKV= zdT()Q>m~~eOYPz258$X4S^>b!jL!OOh})Y2c$QQ>=Ez)JlbW81_LFL=TQMaYMM>kIl9$>iW>uJM6bkg`P&7#*?7AO)pXQGDOxnMtBwqGdUBPNc7F*V5C|^|g$A!4wxdmJlfW{iBBnmH+OMZIJpapdw$4nWlWZfV#X^X6GyG+@vCQf%e@- zU#=1)J!7feQ^n7H&V+ECIb`wumQz5MyX53B=fySR{U@yPK!AJWz|E_U1viS}65sb6 zCQD80(gb_#;1!Vb^yvJM<>+|iDV_YUw%K{DTeZ$y>nA(Tuv~kZz<4o$m>osp{GpnX zdFC*vc%|GBkGCj6Zu(OYdiAwd9fYDoG=@iOc}b5e?dHKA;X2a;#NziAa}qrp7*5Zw z=BfT*7-qW+e$JFA@+-muh9vOMfh%ByaX{w^<_P_7+kDI!|B(s^=I!lmNw87myQn=` z%?AY1wu6oFzuU9Tcw2xsaJvp=*$dJe>^^_>>Qy?=17_x?@p0PX`jfo2^Q5GtF)w`p zJoNty+YJC4|6Oo35UQc%(EjGY2|hsrSk&AC0&Vr)r`&+%A0Yxxq89fOnwXsA@z}FU z(@#^rCn7?lp`iiz(SDVcm2GwI?(W-rdr$`l9*lNkVIkA?(jOaN8At=}@LJq^@`TKw z#<_n$jhqzei)otA4b9EikdGjNh(}#U1`e-9x1_6Uh1Edb3)kn*pO>1n-ay&_`hE}) zdVDV{Q_FwD%&F^>6aYg20$>0ftqJH#`6z&IfL1u&a4 zaDiVXr=ajIJ)P<7>BHvc<}4=D$2!tBy(VK~$b>{h1Z*00rt7`2AXHrXR+^VkXw3(JQAEds zm;=OWWH}E$78n&3rOcerm0VXQrIsani*zM6QWYOOd+*N9PKo!aD^yn26i|(OK_~#| zTdokEIOnY?gY?(n<(o)ApvP9;;`QqZcqxES2{F)%uIo|{J!EzFjm9bi8;|p{^4`X{ zmUB%22;Koj&*xz`HANF9#sL;g50+R_w+1%V5t#UAp&0={yT~FWI+NzE6@eVs>W*nfndX{^gi02*|>HFpFJrHbB#mA%zgAUOM{qxn9|B%7JAW=G(44IE$@!X2t SNbp53h`h8Sv`ES*;J*M|;f&k> literal 0 HcmV?d00001 diff --git a/docs/zh_cn/reward_model/images/var_len_atten.png b/docs/zh_cn/reward_model/images/var_len_atten.png new file mode 100644 index 0000000000000000000000000000000000000000..3e60777d2063d925176799f49a1e821a24ff0b2f GIT binary patch literal 41318 zcmeEu^;cZq@@0hJG-*80xCXc2lAs|$6WkkjcMZYaf?I$D2=4Cg5`s4F)<6gMxqRNd zH*dac{)Abpf1qz)J5}e@soJ|Xp^Ea-Sm?y)Po6x%l6f!j>B*Dl2Tz_n^+SCI9C4H{ zc6joH;)#sJJ7u@02Q3d(kWKGxnD3W>9rD+ORGDqglGQb` z-Na0lxccX_Cbs46vQDJ!@(of*Hzt8e8X82wFMI3l3OCFk)~X;G1Mn|lK8KW>Z_-rmnzfxoNE8qVVboml>a_WLO%f<6QXB~ zMdv8_Usp@KrPB5OzrG{={~P|>9{(>Jsy_pZUUy)NdLMK@oK;B23A-*g-+aHv^;7vk zG3R*E8ZGj7*N|RH^pdgQkQlC^#H zcm>K~^Ft`Eo+tc_*XMLSOWSLoR29CabyN?{Q^fcY(;Mw`8$YA#yAGW5@!}z(T#l*^ zHRQ`bpSRx6K8IdTVEbI&V=rE;RFo;r^;15x{L=2<*LwbVs8_V*&QI{jcdLgiIKG5f zcvq1@X~SJt?s-5BxozkG%QGs9$4@vFdqT*=X$9^BqQdmRf~5|%80*MEfZUJSi89{AWi9u;_ny9Ec?nwoWx8JyUX|N68UA`)l`8lZNb)Uz&qtcD z?|1q{D>7H>KNT2PG7yViBNS|wyvOOXelOb8<*u*|Rk}0@LqPe%&Ft=Yl5m$McC$cX zr3-vkr@=M4T6drFB(PbHraeSj>6@{JMmqD5wr%8k{e+p5K*od_Uf$lt4XaNa&4`I3 zJ5fOF>LlESBodi9GzS%NbLXSsbu~ltr8e0yE*>j>;M3Gbp4releNC`!l68ZmJ`1?s z{o-9y86-1asURbc?t2A_|sea&NG-QnBd=N2h1P@&4jMN?CNkG|||W1_w9CdA+?dY?Gx?Y6oT zmEoDl$h%p!fW)xYC48AWW|Jx6>VEZ`;7~daZO~Z^+BYg3Oc=#lq!M0uG#KJ0TQ-hZ zMc&4wUmgf!{j(pK8NaEQSXkbcAgMccZ=O0G+izo~X%Mqz579E$Wmr2AA_oO4;nire zY{eLBK>G0etnA9#b+FkRAcnw;L`(iaWs(A7AC=E0s-i(DI*NoBA0E6R_EnI8l8PCA zpcs@PGD~IWL)azY?>_qsmQMyL3erVWVN{~FY;_^zky52?BlPL+<%QP_LM$7zKcb!A z(rKFO(g9Ddqg9otx}ulC>8m)4Z4v>(^K~(vV*}b{kG|3-t>>XGzJ-;1#2@+FbZ!Yb zE_B*FITBD}0%lE4VX-!9V5Q=$XxF${C*@SCh+!b}2AeDI&|2`l5}tF@;;LOpFt}mN zB8_6~#HA_2cn)w?9gNQ_LVRHL7cwFJ5a$Aw-K)m1F7^v2>b3E$3dwEvCAiC#GHsNx zT2_n;ttNF9vn5Yj?gXk8d`hxx#_+Y9(Igye(nR@bC+gN}2x?}}fs;U#{~L-iN0Ful zE1s*DrnwXP;#^lovnkEXN3d;)Np9!b!0(-TrZ!Vl_j@g^+;Z_W&|7q88%d!yAKNPM1o*-R1j+FX`k2I$5yHBVoJ2y-@x9od|Pv}?6|&dPyK zCho_S1^*DZ0S2!`-P#J0$=oi$Fw8S2{)_o}B9jwkl>RV#{Eb6w}6|^vuW2# zbkluZ893lk&TckFf#Uoy)o*FWbES)(p&na2j`v?jD4|lx?W+>dWbv|@s?MLxK0DZd zUWVWKWEp&u`%+1g*$ZMtRB^0D?`^W`IfNi z)!yvUMQjO6D#xZe=ot>;BikA_8>D5mH?5q>5w>(kuXjCHbV|9_WXgIVXos4umMeU{ zaq>sq4&r-w(E2=>$7-P)66GvE$JZy2&tp3`3Lzj9F%c*CxHS)2s&#!pY>&>5Gla>v zI{B=w8dlpa`-v}d5n4eS-cfu@PL$;%?ozbrt%g}+q!Xj2L`YHBUIs;`mDiY`9A!4N z%LigHdN%uQikx0fE*P706Z>~i3~}BN1$pvt3{?h``OdrKGY=i0+g~qi9;RTpl6pL8 z0b`7raf^vCUz|1!O>5Tg2LuIldF|csDG||aREB-8yv!CgN{Nr->(!rOq;aBmN$(vz zffRN+^bVLDP{A;}81>iZY}{at5iIjMy%a1PV$Ka9QY?QXr{CzjwCNmtjn=OOMj1Lu zk+6Axh#!nIoo_tO>0gUx>Bxdsjt(dvn7s7o55sYlcZiyBu|<@_jOA8xrkI7hi8Mpf zS;V_L@H5WX_K~5P47|;u91svk%Ph@!2HrybP9<2ggy6giKJxUG+o9`Crd(jr3&AHt z2PjWTCP69$P_F#6yObHX`l&C0Fc1=^g=b@^>4TM~_5=OQz4mKX-Uwp)KKqefP9$_K za8lTd1pc5dXdF$Vs4MzlT)(e55K!4FLfZDnpQqJX1s{l9{l0>oAGQIWrZ5AsK^B{h zZ-8)L8pbl1%Mey9C3?Rwheg7>=)Cjvy>CGsLs-lPU{QKKfi~zm9r}e~#%B^98w1d| zRz3UMdEi71QgX=&YtM_5-_Hh0Myuv6Gnz8GX(H`Jn-2KZhSy6}Edke?jaMyS2^I^u za4uFjVry;yIS{a8@~tA#bJ~j3m%>exMgIUY4DKyTsGH^IeZG zmhaA80M2SVHuVkdLGxrR$KnUB@ZPT_P2RNUC6HWys5XsU-RakL_@zC6_@47wJw

*J^}XNaJN`ynhhL^6kru+@*Ad-MN0KwW%p$*mD|T_EwS%@mT4NH_cA8XF$U6s;S?$| z{-fCj`Y@^{Wr{2|uU!qIh*l`|hPw~oPkc#mn8~P<^7t2(10DCwc=mXOb-Nt)Vf2v+ z?~9-F#t3m=$O{mc6NYZ`OPS)fs*bHtil)PP#YRfLOI(mv0G7l3;TyV$P_xo#j%u;k z6-fpjlR(&jswqh!IePB+GSqeM_mh25+H+2hJ{Dg~7r9r~Be^5fPemZ!1<%gy;ccZ| z^(;@)vK<8@SnOgrP4j@wUXvlapvz%J z_In?|--!9|_VDPrwc01)wfA;a3#lkps_NZMo_+EXl>*tmbDdkaT><@**J{kR^5(rlF{&FSBD$*R`~ z%<6>7YMGsX3UxcEtYTJvDo(U)%P8a+ZTp@>CB^U{T zG6?*>E>5#2(sb+1V|NOaZ2m0yKMdS7l^W<^I#Cl6`Q+joRVFs*+`nzWWz8BTx8`kz z@hcJSu$(ST*+%K_QRY4D-U{<^b~EpLhsh8lf3;zlom$ zQg(c*%-`t2yM$ySLHTO%+zLKYC|lreZCH%WJOx+*Z@ME|xPuE~$m0NIknpN!iF}Fq?u2Qb+P?kY*TbPrmb+`okHy-K?_Es*LBu?zNQ& zxU}=uxs6NAM_i1o6te}rx{GoS1Nr+MKRjkguh-P{=`+R9EyxArIu zs;Ja3s^JcSJTL-(t(^eeID|(dm?`)5`co`>zcq^>GXs?@{?9txNL6|~q+0ryqK19*dP=MRMQTYII!Y*scW=ySO}BmFaAk zrn^i4#wqDS`&x;{FW$}dM|eAI<1_#hD2mT%F!hT1Hah}29r+5y)ug&lsO98q_=4Dp zhN6X!v?|K^`PYdXIRUSvRvhN5O zao9JyLKvPkb|1zGz&bm=24uJ8Dw~NjtmSurNIEUQbb5jUD}z|Xq*LbZeAATJ1|J!-S%&Y47^2OhcaolKT@og4QmM5W{031)yy4Ap4+*JqnoF}r zwCcmU150qHqeNulW-ss%!pZ01kO<~JhnE0|_yO#D+fXI{vZ-!TIs9^Ro3$xv>aE(G zrN1`{Xbu<5_weNch9{D9y%Rrn0%yn{({b6Nd=OJ$>~!8|08(=)J_;R%Jd;vcRCs2C z9k9XJdu&=pq!75O)r;L8#B*=y+zZ7I%3Ag1sxaK`4WT5T0NGEI8N$`cQEZR+jJ}Ln|{%Xp+ob);Pp%EBe8pqxZtUv$#GTD9t#6jqwp~!m)Sr zLpiMiNZ{V}5)b0hPO@c{$n{wrWp)&SW4Qv$bf0VNTMT?O-Z*#kS7X$T z{i8hjhUzvuM@~UQiD(%Q-)6HDk6V6cO#?MtcwI@3*u7;0$bhf<~f_Xbv~cfvHQ5~ z++&b*>C4;+Ga!uy3WGk!;WD42tSe*1;Ry!9tZ)HD9~<=3ijhUz>|Umk^H@WWsTFJM z8Aw;@adr_dd!QVJ<{xA2TW;qHWK%-Zx(pW8lGRg5tRiC{Czo+2_G za9yj=2s9*OPFi-T+b^y0v+>%1p#3^er!d(N^Sy~Zztr@D=&^%hJ*w-F3zPua3Nh0R^y-T|0IH&7+*8c6YZHp(OL!9|+&|~#Qf3y1B zuf>;aWG}Wh^tGNsdR?&7kZg^i-tjbkvz7L}jTM9-4Ey$=&$yF81+VY~v`^+x?|jH7 zF~Yj&-K>%7F|be9OHn=PKF6Gif&D;mdWT0y6L5He+@Y+nulC!SSytOLyHaP`m$>?RIlp!Ca z7G~{v(sx*Cm{{lPtVm6QGJ}5*>7{{1cU|6^D3dWBQ$7N1X}}=-dxi|%+-Evq8q=mN zC3tq{v#je@Mo=}Akns;5P~sKqd4^+$6OWH}u~HIOn~jIS{jtf-9c*597>`&RpLVU_ z5$u$0ttBl{Ewa<;_B*RgL$*vCK>s5@tM;TaOx+NvanZ)j(J*q#zD7+Nfb(>b!q9+W z#Z#_+DMJkm$w${y*1VwEm;EgQq{D~!uiP^qYrFyfzn?WFJE0<$vnYM&WkF!q08-zAwbT1eB#enq5$i@wXhlG=v$0=A_N$h@Eol3vgqZvHZL%?oCV ztjgq8ssD|H+xp~nCREdSbz+Y37yrQ8FK3481xJ=LhPTht{Rfjrlqo7a;`%}DchQ-* z04ARrE!;&Mi2Kz4@>-HaI(v#clMhidjjLB{zd`VC3aUUspYb1_WBDQbhD_-EO_ACL zf!~z;N{6AI=R@$J=X%zX)CI^jV<^*Tvz_|sPZp!I;qVr%qh{MvfAY9w$iJw8f3pK) zC(RdX+mH7WK{S|Vc_5lc`#v`gR1l**niF6<+Kw-|jvmIneedCp?wP*(Wn;@8+ujQ4 zm-CioZHQd*2PE;K%p5e7G%*%j>2+z*`+>*f-JxZRz-h}KSIv>H|HEsv=b!>EI}!2) z-tm4#TEuyXw9Om9xvy=lac8fKf_)I@*i8cWoovfhoBnN^LT8_m(9sqeA6oV9N~mpb zCt=~K`FI-tW*FMR$*SFkMSmyq@Ap&G+U~Y?I)5?|o-J$A9s_?q9@+W)a5gP?bT_T{ z;6!q!JvEBN}Xy`pWl-9QzyF$A@)1ueZPorTSbdw8Cv^YQ9th6=H|_K>Rrtf8%xy*9{uxC8 z*EZ$ePI_-@RGjzN4I6}JZ*t#L1hKP&hiQ)gXA&*(1U999yF44K`~;+Om`)l$6> zJs<9Z;+! z6q^z=-DX}ggw4Zi1KX-?yk7pErH&ZOV84m_M27Yzs_V|vkjE1WkaZvoOFr(n(s04lx0oCi0@ zBj4Pm<37ufrw_}K&!0TH5NEN8gkU1q7g%x=!(9QgMG8;S9)gYhoaEJB^PE*HNyVZS zPggPyf?yt}@u$dB$3cycL`mfe1SW4vLiA27RPdRpAGwm!k8|Fe$LSGbEUXj#Cqf5e z1QQmn;yNltjS0on1nUXR_MoA`N~;0eej|;1b`7GRh_e|hnNOMbH@7E7u z6ee*r%<@#33>*C?UIU(&g}4Z~i{}AWMa>_TM`k7i4J`m&7Iy4n@iMZ4yOY%@zSzYT z;2v^*(arv(Wmko>SlgeZz#JB0WG1Vl%-Tgf3Ra#LE+6HaTZm&wdSz}e+VFTdQrF&*(H4eih zdj{iQMM~MEvxZ_hD|3d~@T8r6WX46jRB6tl0PB(OrbahFguSLhFH;GpEc3YDcPrFm zw{UiPsD!wAa?(JE_G!MYea~stNQBR~Br$p|RvPpqsdis@v~bHIDD+eFGUS6_Y`Tvk z=WPE&d3YUj4{;vYG$z*z12i76Uy|uY6Z0f%EWP} z$$91(a1*~tlOs;adp$)G9+gwz3&+mycaF>dcP~JZz16(PQ#9BCb)2Cs7{HA?F-z8T zZdUDP&A7kNMi~-V#PbqkHFd~VH7&9-fqpFLm_TL-jPL9H4v7II-jkq5rH$E?zz-!> zLr_SK*o85vgk{2HYX9uuya9{RvFt~PDWO3)jm+1Y>UB zvp*I!vnl)`2QS}GavvbqKfHwvdSC=OW^QN5qj>-QsCWGdPZDrz0FWLaXvB$-$PXJ< zw)NRpEmXY%oBx(Eh;aiBF2iSKj;q%9u zGSLUB%s8hE(IzxGML9zNi36(2cjm?eyUA)hoc)JC>e$+aRO!0he__#iM`!1?QZY7GA5`@eLesR;!FZX#1 zlNumVN|N40tB7M)PXMdb2X4y;5%BChBjG@>ogc^`t2S)LsGAAV$XCr-)o8{T6e~gc zios#+b2xeMGmq1X#BVaz+t}dfs@)jU=A2v;fT?wRmJleAZ9cU53%{TMcuS|}FoEOX z)?k-zn{!+V@syheeWTDTi4%5AY@?h_$?$on!FJkNYEGrCe+!Ook= ze9^JdisG+?s!ZIeM+;xnM3+Ws(VH8ohj~2>+jp7uG;eu{BBf;F>?qVlb~cRRFC_a@ zLBU;oQ3Fq@bJkkX1K4iJ2Ccb?W#Z~PFg)0w|Fk<$#ML6+tr3L{Tz!#{#O}+*3m8kE z3;!#`mWE>wH?l4#+Le-2mjDJgb>g!>GOPjRplAIfeTJ3ydov<} zc0nHDo@oeneg?KwFPbu#7aGfm0eCs>fSB<-2T#pe3@3hS5D#^57o~F42of(+F*Z-u z!aXgcnTnxV^hkFj<#-7;fpR!T1Ha@m1y^@};yGcf^2%q=jNch)`OXk<4_r};&phqK zWiIcY8x|^g&LN)A%L|(8cUE?o6}AEe+R&i&d`>hz&oQ$absiQctTbNb&^~b2I#Gmp zJ`0KvN9Q+_CjX>C4oYDNOD6e2v%!fWsR?6xiFZ!WZN*Kjn%Xr9LZa!rqfNvSI3VC0 zteg|;(;`n2hxFBsRq;0JTI9dh9;+9EOQJSrk?tmo&)OLaFT6HS4oeB>dHT&7Ly)!- zRZZ?z!WNA!ksV{sMfx?AZnVv$4gI7;+4iFZWJ52X5Pyq*EYni1TlDX2>|))Kz{%h* z>pH8*oSoRdsFj5w*JZCW;IHNj@1xKhv&plW#ghvEW$zOevQV_K?lw+7&#^v`P2+td zL)g+y>jgVG9Y1gfLC`_h!o|`>%l3($7v{S|n<(Ll`#)TIUsROv7VY(yQ`F=+c=l5ekqW0AJEsf#i8ZboN+LyR9-D ze(XJ3(u>O30aEs79r9$@G~C3I>th)|L$<3s8@2$t1z5uFJBa*fXL$~lMC0U7|KMr> z*=ifglQyzy=LS&pjEgIJVqjafg-*1Y0TFI(B3#E7eA<_J+Iz`JWzic7b*P9XR_cJG zQ0X{!(=-~V_3I87%>$u^Wc5*aj*4D)Q!zyeTVS$iKO3_1IMU^{6(5snL*1Aq-9hBNe@zPbFlymmD~XbxUEWy=zK@C66w%h&fAmji}+yi@o+fNVO<3p%#2Fm+*39>QW)! zObo>&c@_87dzLtTyCH!YnjLXaNYE)QyG_o+%H@d093bqn+_&|7 z-I}2mpRY)H!^)cB4>yn4D^uxc8Rpwck%+>HKLQ#Yz9jBSEicd(Rirbw1M7AVrbxqU zK`?VAQqL!gz06MQoI#UHC-dYq)MXB_)&<_yL5avb%?(nsW_Hx#^lP!tSt@?zFmz_T zrD297pjP5WjXbq+T{Navk7&L#pl-J=3LTi#9p(~=fVSndIR}vXD7EyP)G#hsDt1dN zNeE+A+yKjS*6Ggq+d<5P%gsXQrKgC?xQr>pF@Fn0?RDF6bHrC(Ci}SJNhD*NO6%9# zMd)5fJSj@2J)u~JK9}&C9fH43b;kfAM-Yjv!=y&;M1A2dZS)xOMQj@L;nj-b0;T(VH;OVYqI=Tmg*+|P6J28$9p z^h{f})WOxSSc;qE%yJuG&tf{OM|!JmVCMFw_=YbPY8vyZ(N=q9#Ts7qaV(UmhL=D@ z9t0pDaEC#4boPlSw0_j+o}s!t_<8tHj3`w0#da9Q^n68*Xx_VNUMtRCCu_ATJ&p$F zfc1(-^is35sY1IlIk4^7HBhLzF1Lm|q)=V1V=pOqA4quNNeYp5T>`6WQ2foR^Rrwa z+vQjY_1&nTeq4OL4NjXm!UEB`BCL;9-ZBuc$NSlxYDEIkBhrck=k2$$7u?GIaRn|n zK5k=J2VJ&$48F+LNSuqW{C4kxzwr@A8$g39CQ}5F*SFsNc7?~=JYgbs`Rf__p=PnQC|Ax9j%zB>^ zi71kUJ)hB;kTMSc8L3Tx!e9Sl3d97%N^kqvlRPR~{K8NmcxX8&=sY;5qc@o3msK$4 zdcBu^v-J);no2AzxtxP(uhT5h9@5g;KO1_Kbc+yclvf>ImI9e`5%0^Br^RI2txU<- ziBC@Q5XOsI=l?VX(hW!0VE_@ex^!xuIAU)P|ML%%e?)MecO4&FJ}intmMiDh z_Vw#y!-X+weW2JViLNIn4}DYbAHAcg(vP0aT2LIB4g^xk=fz!-xt&c(JgrRg$R+3( z^LPmPMPNr1*t12}_+>3A#Iv|q%3q4~J%5M_Zwqq_*c&lw_|PTa8JjK--D*r@l4d`v zx@lV7jdDlDTK?;TC~<8xWnC%u${xW(pw<*+l*FF(RU3=PPRpClw)JVa1UWc{n7bY< zIQ}@(kfrrP3d;sC=+?_=K^~aUm`*`j&=w>2f!OH0xsL&2{gX8b>d>d`9CWpJ!AW%d z4|cSuuC0p>>MnSGk}GE*?d;VfPWu^pS(5AE^Vxozkl8J#%uD_``&6MZclI)Nt?Hzf{$K=DydKC11I-FJ$877XZ+G1~ zmnWDawC$t5BE)M(zE^Cr7S?@x1q4fP$U=r{KF26jumq#C*K2Nw=~W5|e8kfCv=8QX zfTe|me;yO|0io5lpvHKx+V96jIZ_UyDNe>Ix`9ga+7S&EuHQ0oR+|tuii6tbsJa@ZN=JA#@6|;hRsX4qCm{pg}^0pXsKD002@4I;QA1GDc8F z>M+gN{Fez2Hbc*^t1!`X)5OxaPzB}lnt z8;0c8t!t@R7psI|zN@~*+PonN=xZob77K=H#E?A6g%)0l;p9)0aI9KL zoV9xd8f5U-GtS^40w*joIUR=0(VAGj7WSoyu;_!}@lmTfbgjhwwMwYu?g9^Dp@tvx zNs<(9%}G^3oo(eQRyg%g*=%_LXh&F!2fP3680j3vL}*@iXAccJ<6D-8?_QIzQWm~g z-%^} zmS`ds54s(HuXe-tKKc$9dg(-zu&KTH;jCxXBOM4~@>oL@7kSo|YV(E$XS)!m)+x>e z7XJ!&Sob!vqW&TI`IGJ}a!$QHik7`sPmfU%e|&n|WH_)*7eq81=dvEEu(;uo9MM%K zI3rKWZ|fC|zDkKYhBNJWLT%1}g9j|bE;;#0Y#7BkVEd5Fa}8B+bXdS3kw-rP>EZYI zACAYu3#wz3pw_P&pqnW?gjEK+7Al>mepgC&R;m3exr_NFte?muyT4b>dDhOkGKYhe z(oC+c$cX=*L=aR#aembtCA=f(3 zIXr&GM66F5NY(1YR;dBRn8ud~SIDgCG7IhpU8XfQore0|>-~k?3f2LGv2Ne{@aPsG zU$;-0%__QA67#W~!kvlovi0}fUjM2VBubjH&NqXJ05iLeAcF(Xv?=gwE3zq(5u9Me` zAicT{4abcYgZY~D#3sx9PMvl|?xxo6VEd~pN~=*s4W_!A6$4q}`WyNKd*sRY+t?HS z+tfFAB5bxq6sm3@mY%Wmvd-JsNu@72jjuvvR(z=sLae<}OWGd{f29RA*xQn(4)ySZkBv4l3jA zjEm;R@K{zhc~B+aFiM@%fkZc9EuD-3*d7PyD6K_zmL4V2yWPTTn=|62M)%~arB^fn zisYun-asVlElIk3)SJ7eio5YhLazg~>cyB2wN79+bn&1K9cJt&xfysnUSXs zBqQ21A^T$8!M6Fp*1Xn7-khaSCIHr}w{HaiDfP8l)(a{A{cp6$?ahHwN~e>d2a9SW?IxZQoI{mOAmO)FuSfX;S%kc-0M?}6&p-XM z5DuB|VLltR0ggo1?GUNQhLvd@kOLpfoeZZ4B7iU_RVy^>&zCHa0xvXsq3#2;1p=w@ zq5rBI-|(Ri|EAy1Z`YyZ4$@p2er7WcwL?HBOR>r==i5tuS1(T7zQ%gDrX_q;EEfS^ zuX0k2)r6^9foVDWFMV4SZ?tU!)%U{Plp!e)gj zTD9ULj1eA0@uZr0D1A>X?wX1Wu~@fE>C|b-NOY7JJO2LKNr3ckfK1>tXRCAlzq|-F zf>QN(u zvBj+1NvbYJN$N~@1$Sj<9RB9UDsiY{K;k}p6X{+2?Bb~+8>Whv!3#BvM9f1$gtJzp zl)s*4&4hMP;N4(P-&lu!yI(sN7-=rMPvMxrq%ursCNQK8$gI2-ye?B+=+_oK*SanS z#T{W6-6y2SUU1AM>4A%lfHFVVo4dBV>Ye9xJ z>-h%j7b4zG0aQM}wiY;FH7Z{*kpmJO4KUFS!&Ncn=LRZv4!<+1wM9a$MU>AO{|xcQ zpy;M^AIKfygj#irl?iJk9v49F89hSc8PVqx<9q>c(#B~{!&&~;L$9OvD!m8A4CBq| zu<%s{y_IONPRz~RNtN(0TkVqFy4=FzkFS01r$7rHIIN0bHn_bP$09(X(SZ7?G?{d+ zQljyKw6wQ9*SDYPUc1enaZ_P#UKRNK>Txd-i8NI`Hsfa8Yr*>4!BxU@deA9MNw3x2 zPa@nJrqLrj@e2Sm<0$`q>mmwo$CI-ygPE z21rrPRoaA2mM}9Lgn@$#%{ME6<^`8Grruh+Ajp(#FB3HY!Btl%QY4k@$Lrj_1)v~6 zLu!+^!W?7a#G^b07{>+CtITeT8qGr`u9+5q50cP#2DfuXo$G`Z4f$v=J)1(x!$Gv& z!?Hw9_UcSo8S*)vcpe|Vnl$IsrZ*pJzJ`;MH(u1tESE119o`3R^KMac;~`G;r*hN* zVDtijhWISn+=pCz#uBh9G%Zd6Vaq|=#f4ez|DefyH$ri<-1l~!skBt&KCV3Q-|JN_ z3;YFWuDPY!u-a<#S}QvA8|AlmY0qU_)#D!NF=_>`l!5=LrU7(E&1a*Y5697M7MmFh zmYMC3bdqh$$rA7VE*yKo{eg?^e0;TFwbR?U2!zi_?4BulVsP}_b?H&sMEFMQb>+WB zLQl|SX#pS>y@AQunKZb_cIC;V58$F`={564xb|=M_Y& z?QFyGQLXfc$(GP&d47`gz{QE-8^h(flQu}hw|?eH5&7LE{Ue8oOW*3UpK;;6zj*>B z-2^7kKY}7E#rXDWDv_}h+swOBw!5w8T0WcOfTm}!&H4R`o@sF1{e0gvv`1wV4b^L9 z^hWeDR>Aw^z|~?@F^uj%w|EaIZXT{4{^mdQ`2~84I<6_)uRX(72AELbA%RHu8*Ho# zHz{mKLgmLOdLFHIAL8==J?6KhkHV;=v6kAC_RbU2`)s?K#D9Lo!*RPG7Citi`P(qF zC;KVvKX3dJC;3CS@9C=H#r=H}2BEoo7@)Cw`@+vt^kj2c;5R7$2mi&d2N^ZOs^z0# zTfU5w!Okrg&$Bn5U(7OuO{+&TGB%ZdRb79nrTiQV*&pCMd{&R2`-Tr%cuVyLH_oUp z=Hnp9FC+8hCzSi~UW9%@{RX~_Z>zW4&lpkR%@Q!zg|>w^5aS0)cuC$cnk}Ig_fpm} z1ldFK&F7CU&UWV{?G24W-EuBdsOKDA=g?VvA7zCFbnGy;+UwooC0qItCY-5gjo5+H zcN&Ru%?M_&sPMy^#OICO!%T1JO^RtylpAJl<^(g;D$Nq!j3MRChbD!TO*0e;JN-+i zQ+lr1f8Rh0^kF@-?BdRsrdd<_?x(5-cC1X|2d+1Qoq4pVxRTk@UKPVCY1Y`rO1e`T zpg0IF+pjg&UnFiF*o1N?-cv2T%R~+tvLL+&zgf)#S>4{v91R&ioD;u_ySSnukU;S;z7h7 z!kacSVW#Z%K2KiuzykJ$=*EK8x@Za@7;Ih;gMvp)xT4k{)l@eB=m3Vte}3IO*V6(n z`D0Uel@4Lt8J#CM>~?KhWC%+g$r2q~sr)cm4TEbM#wa|JZAr9rGQW47tg3~^U~nE7 z>kw!7P)~R7h8M9@Uy)zGu#O#j%U&K0sQ`GlD;s;z_btD@8+&y!q+R=ck6RcF# zEi^_?Vz5)8k=OyfsU@|fh-l5&cF&)Dms=&MGKn#eD`6Cl7SI9^qmA!kjb9NHtBHhL z)Lc{Zj-Bi+1+LmQ0T~%+Ti{$utSEHmRlECdlO3Ct?=FDAX8FkQTgB9#f&|Y)NyJ` z=#~M5TnA^ZEa8g+P-pJ2NUKGlS;jtf zgzoS5o(OT|lk7zs>H-+tz`N#}ghz`L&}S$go2P1?D83Hc5*ka1?up8)_)2nS@CNM1 z11szx-8+-d2MB0O3IF=-@uke0C)f$z7n^y5x9CMzO zuzeNa=kTxse3e`obWRCk`t&j59g+OJs^%nu>#%DZd9XZ{GPh@40r6luivHNF!+x~3 zygxJ5fzF5}>?OnriZm1k2!V8P*-H29e=Z(fgnk8D^o+}(7qo=6ez)2Q<84-Xdr6${j4A9Ku4({bqkV1dCIx;~qNV*L&l zqX3XIy6%~|7S$|KhOP|>G@&g4`hZ+#=DwKTWg(ZL^1Pi$>Uo{OthG2b|1}y{Ftej3 zmUJ?NHc?(Wjs_}*^R^8THX2i|nsI+9z2hJcS+5{PQ{x1&&A(vSi@;zIWAfKWmWbw> zI`g}Bi6YH3Hu)}a;QZ#Jk*L$TIG(B={b#};mcWg3V*Hk$6K3^aEq;qTXf>_c0i-Mm zMa)+1{uHv$*vq~>XpzU+qf zCr{ymb4R)cVj-8&{W%39V$YX&9I%%UHK;|NN zB09~F|BoPsT$c}h4Dha`5)EnF{HEZ69teCy&@VJbS(xr+Usa-ISndbMqpzyOFuF?) zMDHpQSh!wuVRLD{noNz1bgW~YM;#tviN|+u>>=WzcF+Dg22g*;`r>gkf3W)Lt)3PC z+Gw7g-1{=dn46t##mtd^mdj8Wx`^ftX)K35l#50r8f(PSa5FnSd7m7^F?5YzCuBV@ z@aF5U)gArNKoBqM;2*~fktZ_3iMQM)!$u(qXHT_BX0NI8;3)-Gavtw(`UC}JX7lR3 za@cz$I+Ub3od-+!9s15orE;N4&7_y?U5SS6ukEm@XmdBkG#aN9UUn2 z(wSwAvg38?qvT{n4oof?yQnG(D32Jz0E)w0qJ2y{%WKJe5J*`8i33U|(7x~+o)h@f zbxgY+fMo%T2PMBmG#7l?2Bc;-^$0pWv)J5!_W~p(_>@XL`^dV6F@v_GSegu7{HsUV zxsY;s&7lJx=a$#)5^D<3pF$(lKUA8N6fUMfcmAo2D#3uQ`;*;0<*PIj6NCBl^M_-u zV$W>RL+k*?V0Qr&R(^GlI#{3+&>PY0^TaeQzsHiR`W`=rCX7lql25LEVenQbK+%Uo zEO&yT2@{oEwKkCpWv0W*ah7lDuzdA9s&!$icl(^7BSTe|{a$j%M^!t((4o2xfFJ|(nn~3_ zU$_#LG*j6tv*e-|cl602$u66uIEa1CZrj=V&@drozd-C-%`vfr$M(#FadEH5XbmiN zmxF~9fV+Xo-2r{mgo!=?S^Gvnq8ZSRP4-NTsl3ytqasTitWOx;)~nzjc~NN~=kV?@ zKchoOv0lIUOXu5c?NO+1{bfQ*TEkqu!k7L&+9qvu%L_=U&$P;4V&7jDcEXOJS%_$}S@qZPiZ2PW08>wVG-F z^TDb09I!|@DJPddY~nA((rv0OWjsK3-eca zc-}TwS#|~f%pN{<&x_TLW#%EPpMVaf;(f_Tcxs4MC%((a?lMcTo_FiQo^zPkz6RY| z8QD;1PKe+RAo}y2$woOoE%fHDUHc8`!QVR*Q%I%nW27oKpjwjOR1CJ6v$IsW6X!fL zY^1WVAZAw9j)wnz$-4KiNpouuJ*n?Wf<4@Tav9SmT#PfXSWsH*ZXC|AoT-#dFVrwG zTE%?~NbC6ah(%&eewK^?W5ug-D1R&7K^M7%%qE9)Fh$vQdDOgEC zd8@1~5SkBRwNi>24-!H6)^J@DJ)$a+mht4Ufj-q*Md%DO;e^<;(Y8_h)Z?9oH>MDHfHq^9N>lxz;& zYwH{+(70NX5Hugtm|d>qebja16lpz2_SQ1n0qsZe8Q%|Q8=#p&mKe$zpQe%pZ#CFQ z(Wtg_;Lbqs$L{B8jJFc5xL6oRCvQ*lpWh8?L@8d(=2WHg^Z6z#+s`SQYU72u2`yNy zYLOH-fv(JlQJX$z;KUXli&c-ld_LQmSM79Xo0i;Y;Iidt@b&k#*bz*q2mCMg-omZQZfhSEL;(S5q?C{b>F!2AN|f&I z?gr`ZkOl>%L%O?5q(O2{`I1JzOwYS3)&4xp?O+LeR9pCbqkNNxPpJd(!#1{+754K2C zL&$obDHPRvE6!RaJK{QFItP@3yqQZo%ALJq7CW4*fo4i zZKgW!_I2j8YCS(gJn?ME7L*j zOZ)HJzH`P!78I`MV7p6#U56&_D%YHvvWMyyW6@wKI)hM#N80* z>(iAxG2%ti-WT~80YNL$`m=k9BxOeR(!(Wtrdp&uG_N}wyHIx(<^~aQDCn(8}ft?W4*tspH#?!9J%UpXc5*$Y!muRru3TRAVkTQgLUj%R+( zTB80Ml#`!KaK$43F%W}}S2QV#m#p0m{~cSmS%o!iUNAv&#hHH- zl>p90FCP`jIXVAan5Qzsw|Ct$v|QrhR${&?4FC~eamXV`<(lm{nkeopMWCvjbmq_* ze&rJ1ja^oDpx1xfrE#P|n0?)c8Ug8t zpE`2vlw5;F7$Ab5JcrV6e)7@%{zYcxLjKCq7i`6T_lJL7#pe}UiY~7iXCZhVQg0nl zXk1~na2aH?2On(k(qYnibSg2P#;im@9(%l*)mPH5%Y}%uIO_3ZBu=@>PNu1u!?%~* zlt-fZ3no>$#_RCw5j?Y&JU3sjE6{&Y3sn-7@l7$=eJS7>PMy9zM1ibqqC_EZcY1}Xk@Cq z@yjG=T&V60`8}W+okx@PXuDu(24SB}#9RN}4xSqQ0nuOeJs8;ENT2B`k<042ek zKVXI|Bj2P`me%7Dza1P-KT!m?MR^3=_^IQ=uN!FsSNu#$2v!r-aNjg%;+; ziy^3_0`y=OnNU~P!E^2HN4M1``Cu3>qldhT7Xl(?6eD2}wc$-}%u#d@EhZBt6FfOH z5va8@q&DN1F&ir*MIQCT3%}uYr;A1keT2NK9d&MmIVKtN(%^kscRlg*wU@HabeIqR zD`xitx>{;$OMJN00O)**bPN7ke>S=`OHky4;Cfr>)5#MPhDf~6rsD^^EYDsn? zu4jV`BKQkyN{xDuE5kBSx{)FgbU8F_ZGL(hSOn0nicW-syWtd} z!r{L?cH4#MW4Km6+QR7){2t-o>QVlpFi4Ak>|s@Q?C^5@uFdJF+LxSJ z48HDe%lCR@<*uvT>!hhzW3|kB{o!8_neQqWp5VpTZInWazp&O{^`Kv5%%!2eYv~W( zCrl3^W7+&y$Ckd1JA)E;)bMN+N5`wSLcYfvZP#_K55>No7XZD3wCUF0Wj$CQlHDg7 z)*{@WmETU6`96dtFrEK-r|^HFG(|~R=|S`99r$|J*!>lk$+v|MKajZLVeetweRL-8 zhaDk-8V+4}%*5z@n?#=H>Uk^GfOjxzW zKd)aDfF}R2H0HfCTRp~${56}L*%7`k%lmx1P_XSx6`jNvx(Lddx;l>}G%&7{ zMSbNr4Je}qL?5ZZiP)UU1H_4qM@x7T!@* zZwL_`H!H7_uWyZ(FzDXPrvJqFD-`tAc1Anq z4y>Ru)TmC*&u*RCT_m2LQ^Uk>yMHGt!s;Nlz%_2GLN5XP(E~7L*b*y@C8U(tvxAce z_$%A6LqS;pH^cUuA1jAaN~fLKca-J4&#eC;A1>PgV*bkE>#gRgZ{u_`lKL-IdkdFr zOx0^yjZ5ulL5fw)kglho%?G7lJfM>Yx0ea4FvP_bZ0ao60Wnjj{GiSR zC`aW_gs!JbxbnVK?ckMU4dVbBH$bL}FbZGe_o}>C(~|roPBN)Ae%s7$Wfj0Q38TPC zo0+(Hh=8i$BtRgoI7_X6e*>$@gc+$jaUWRad-X*e{vGzDm_m}>>rOYyV_s`5hn@;J z5`fAG|7!zO0NC~Ud#lC5$cn|fLi!&~+^AZ4@4?wk+w~bXj4FkyH2yl~fWn;NfUn8Y2!JgBl(c|fxR7Txl6bBu;Q-W0zEBLg z34mWffW@WIi<|hpDh$VhVJ2XN?$3D;u38lc5hnzd`Gt;EG008ol+PSqfYZ`IhMFBf zFdv`{z42)q%i$kOA2Gp^0EM@{`{gsc6MY|rZb7w5KFweis>H?3ok}ropZ(_o0Ah8k z5`_&6aNh24V?>yPyVC=1%UY zkpfu)3GFx`R6rq^e%kPl(jJ}!z)qV(+qMl6!I9uuQiro?SYx`5jBl6TZr9rjVxfRy=q+CvK zw(xjUg=)AVq+*&VR#OYaKyL;|;`{*ak1^M(Ju;z$Qt?!kIc9?En+mv&?9ebvWF6$$ z4Tp$-ElnWQb-FbL(9&)I!MZQg_UiTp$pT<*;&jM&$3&4x&H{^$`9fKY@ab%|IMoL3 zm$-Dj4{w&cDD(q?i4AmD)aBD>Qa_Ok5D?=N-x=dv!R@@=%Po?A{SH!jmr}OA+V7;Z1*4>|P%Xw=a1ujYZvLZI-nS;Bwp! zq}iduFOsjLIY6NZPIBnJyU8ow!DA`07hC3R_6jHCh_m_rD?m9FjGp(nKh&fg-Ynf$o|MN6@QpkxEU# z`IR)`ou4(ltt4(|FO4zP0!Ik400Wfq7i8893HnJ2|~^LFVI%KWzdk-l=-cbsVz5X81UuDQX_ad~7>Urrea3>Q^&bF8Y< zV0`E60!O;lmcbA`-rxj)bpZXh-7~;Dv_#60csEkSqMN|C9pr-vs&nchgqI zdTiUEa$Kn4xmvxgS zc=~p?8W2YF*-Wi0Kz(JR=NZySzJlPGqK{sv$`a6F1M+!G-g0FzFX*z)AGL`~PT)-(&~wV1obeNufF z>@ibI>Qb`g{6d$-+_g$6Tn?q~q;I|AT;Of!v=!%4(@Sj?048Msc@r(lDwoThAA%MZ zo^RLW`AI5jC0J(q_LE1lG_R&2`ucfIbq%HR5|Q zq6$q&zk_XXTqhJ$LdyMDz#8FYItnI7j!3@{CwIG*gT-OAYD4v1+17bZG)i-s4*q6q26d;HeUn<=W_&J7YNklg>q%wt%L&FWyKxA zbE9&WBaX*6A~gJKe_|m|p9n=tL`5^c1VGIBFHFz`(y++$zhl!s7DNUG`8_B(IY23q zC*T7i-g;xO3pUP5(O_&shv^G1ja9Cj(5o6LU{x((|F;YZoM3Zhcphxm0!2K003QYi zmkL}(6N-?&=L^0WJh$sN$^`-7W|l(CTw*IS)BymL%8R#DXOeq^ z$$gmdS9@;Pc85VN<|n&$DvGW@+)_Z3V-xOL67Zs~Oy1S4>+G)p>B~R(o@;#{azBl! z)h$^2)N{#Aj$>WS>3O+v<3L+A5GU%NNbcP3+TPZB;kzmf~h9!dB7k7@mnK8 z*E_S#fy)wMI-kAob_=sv2S`#%BZ^*?Q|U=Sxv1%HJ;RZacF2@9ukTQ{YzZ= zZ$66vmHJ&sdWpsLk4rM-K1OeE|_g`3~nlwf@o| z;eKfHJ^E^>do{=(T6}{gQV{r=7b0?7LPR!;1yI-X6956yrNJ{HN zM4j*DoX|t>wNgmY&Mmgv`B)nirR{c2=%Ltg;oSSw_x?_T59?cXmw;Qbda>98VEtQV z*|C`hX~Y||N4L4z?3TZ}*Y$mfA(Q?e**z=~A3M1Xvc131ZNBS*CH9*gE2C}A2|KK1%{0Qlb06xE<^sW%lzwl z%rG&nunF4Se^ZqHZghu(!PvE`FaGdvl;^KUQ(=JEzccs$D1e)Yf#r;I@ng|{weSwM z$Qd}v(EQiss$qLZq64~*|JFeGtA|ZR1wLirrTkCaz$Bdg=Rmfejpmv}_g^hw1Bf9T zKNIUe@CGaiZMOo`@mO$^4DP=p?{5oXKs{0+QXcugRLS)?fnJSim-g5HZlN1|N-bF! z_#ZdG-u#90s@KZi|91-pU<705D~tZOanOJv=j>F!|91=jYu>*O`d{<@9p2d3T0b;D z{hJH$zmW`L<9{cLzw_Jwo9AIQ9n2M=ZSQ-PQegg<+9<07=!#Vgn#Wct%v~^y3wpcI z5cpANz9f$ZsD{g?gB1JDM<^WRvuztZ^kSI(ReQSf{`WJ)qZ)t$IA!tGTt*YH{G52# znzhxMI;FGJab1whaE4ggQDhVU=B6cZ@bqL5fUeyvwV6~CNB}X0PaS&Z(2)l211DYp z2k>V4w2_nr$wnBIZc$;jsc5>O%@iwcy`LCeE1BANDRrODqzFr|qqo?ma^F&(oyLL& zsr>`BP9dm>b~g!(cIaXB*~e2%CBNZxdY&St8N@L3lf6dJp&Sl=zEk&cRDH=ueAK6R zd2JNZIhLL+%T z_M;NF8R>;_%y9lX<%I%;$n~Ws6ae-L=af@!Di&~Lm`cF|qU<3{rGoU>&^0K2*kxMX z;WK={B<*U*vm6yn@<;F|Ad`8y03_9Ioz z3lC6%al9w^PV>j5380d@$xA9E&qJmw0HO7X2DueIJv;QZlOjS+Yw^N zy?1f*t-j@W=i&xXYxqe15@y%z%o6flm}5Mx00t#7bh3zPXLZ6~UY7snSL5!w^V2Wp z7M9`sUE1v?)jWhzKwx^7d>#}EO6tW^7?ON>&h@L$VO^t#(|I?!>ti3PgJTQ)8=~hS zm6EJtrjfhZkz2$qI%DU<@`dZ9=t3YUR{-*oEHMNPA6S{m@kl~7RZk75A?O$?K-=0g- zsOD7>)BqF%(ZKx~$#at)Ys*)BE~xdLL>#l+G)*C0Y2!M?4pk1@JnkdCn>jPP_FF)6 z#tsYcIZJRF@dgcOt<1P*S(ufuHHiCyikmG(7%ccVdltt9_HaIMt3C6K}z9h=-Fg*4zdJ`#3xxWjww&o#O}?oFEVg-&3_F%`efQdN zGkl8>RYhDLrz9TML1Gj>YZ5k;wf^3Z;}k? zfCl@mT;h)$g9o2nvhmcSWWEwiu>LWBiB-1hmNH2R5LA+m?-uy;LZj2?RiAy+k_j$p z(rkXLT}fOhr}~2a+n-O_LpXBeFHm@djYzqgUNt3C5^U=hGUBuDyDYd+pPCk~=Q?S> zv{tpp98c+$r)6&)7fIWU6fp|NEevFEAOF;C#NKIikDAwI^4^Td5HA?dBRS0N+L7xE zucOK!n!74ZKCTKjT6e%|1={Crp2pY+Y>BuiNdXNt8K7aZpBm%YZOM$USjzgGxa^@q zk^$#U#0Dd%{56ly+?Ec*%AU23rwOn3+Hj8+5iuV7p(btKMk!`lS(_(0J63hRKv!+5 zv7_K;Bg0_za(KHxp<(TFAD;NdtdG2wLufl8DKimfwsG3j?^*O}b|8$hyeX={@Xh6U zz}rn?BMJTw-_se>lAQH8D^GAlSybo4u;}=;JW$Y^x-{q$OWhedMg6E%?BzeE5=^q! zBr6s*K0&XktwU1ebL9vam>+dAoSQDme}V!CA%Hn9jn#CtifTjL$EiTQb5vrNKT%;{ z(PA`UXkI7C>9a;pW$Dlr-`gu%%dJLyE}-=>xVVb#saO{Wm#WsngC!x4`T_?bS~P>& ztQF^6O>h?(BU=eHXo*Q0ODZd(Wphg-d5-si*IDEqJTyZE=)j=(@fhYbForx0>cFR( zek&u)b&$HPhPW#q&xtI?vkW@>6}?!DY|4z?&M^njvlDq4Y&GXbSv=UI~fjebVF4Yj*VHmEP{(h8bJ;v&h+f< zPL&a+U->8DLHRiiO%p^*Y&sa5{QG}=7&yb_XM7Ci@E<)O+~eq3!t)gU^{E`*KO$ey z4jkvl!|Y?BR`l3%&jO1TJ7^o}2iMu<2i z5@0}#v3NysHz{7SZeRNcs@CSI z9SyY=eO!u>iUC?|YZ?n{DVj~~`X}v6%IS}pl*zoT!$#5Ww&yc@$Z#H5$v*#lI@+B=e(uOT^b{U{4TR{h=2pG@2PYkDTWI)8Ts7 zj3&CCTLO53L1pmV$R*g0DQwFAW_K9z$pnRo1oyE2a~I#SI9e1hjHh2)x4{=C~o zG6}O7p#%Pg<+yY8wZ&{9&rsAGfq)_>Qb}Ow_9+^ot4I+tqyhn=r-D{fH3CQHuJmkq ze{*3`^rdn=g2CvskF2A=7ru9gU4|2?yZf81C0ehmRcqHK=SG>_X=4$g9b7x3PA1P0 zK+q;TnVay{q(g`oRRqpt)@Y|HkS+tCs0(r?ogflelGP43FwFhnrduysCI6uHNY8W3 z;)N+{b=5s~-AknP77HN!elnqKb-TZ;^vEDga7y)tw-I0i=GvJ zQUg&o7pFGYiH*>hG9b30_+<~f{|h>yVTyjZhTP)W2>r|Is>D@1K2y(Rh)MA6(oEG{ ztzpzNYWGD=y)*aZ-b!Nx8*)3-XBc*-y<5TGdoio8>=CNqv_i3{Bln}&ng|=7)H5S82=)7)nCrjWHOGxP zL;q;9w$aHGA)DxIw}h76w8^W#uE`ZiTg+W3lpbiq7-(EmeQzugD|yGuiu|&c)T47P z0IUUo&HF}GrQ4>(kE|Gv6XRriLXBTuqkfk8_+e%F>U2=)%Z{Ls-w0U>QkX9#-rHn? zbkSu$D`btYJTHhgo>10_YqFyi^bQ-ZJ>A24j6Z^_Tu7vLX-314nv40z0;8p(44oI{ zMqairKE}1DH-3t)&WOxiS#%3~zxg^~0q{&jni28Gi8byNNSTv3 zR38wTFzf`Hox+f38g&U!Mx+QBbi^Ym{OK@{ntqj7gjSjom!7!f`j!HIi++EI;_C;yp^L)J{8nmATgEZ*M%Db#`fx+qUoFpA z^6K8<#uj3JwwHJd*>FyjrNA)ttArb4)AxLlmyDsjVpnT zUofGq9=ABu&SAS_R6Y)3?b-d0Cv9@ElSMg%12^pjE|B3e)dDd!rte6?;HDQ>WVoV> zXAq2OIs0v&crf(W{a#>xPfoc|FH7;YpCN1luE8)PLqTgP&K;DgR7s$EU^a%!zNagG zlk#0kCPsG4I=`MU7pdQ-o+*Dc3hny!4?&fx*vH@UylZk&FiS6>fI6X4Jk@ZASmCjQ zprMt#QQ64ECa>lE;~MdgPC0d2`(?eQeK-z7+<||LK7KF9*QcG#$RASMk0toM5?rv` zqdLNvn~NCUx-E!3U_^QsvzB{KHhoPsx@LsWn&-gsk?=aW>=7fDQUAb@`IPm(R$!lI&WQ0eTE!=e}8j; zDAeyWr*NkmR{Oq0{G*FE_Dcg!Zw(`j^42GRWEzD&I{2Zl35#X3lyq}-T8%!1e#M^= z@~lyi-oE|hxRS*soa3nyxGVpO8X40ka%|38pFYt;j4!IcNoVqOFK*z<%=sGNnNgCc zX2}e16q!f;b{@*xhVTkmg%*2?Vlp+cQl-F$n>~l< zXnmbX@lFLxyH_Bb8)R}xbkd?`!$+7qRR3CPkNY`|yigIn;F~hupLbuxFR~P^Vi&*Y z9PAr+c98Qh3&Ve;tnk8&(LcQAD?voMphzei)){c^?P4Oj`n}Y;ckSf<+p!3*PrxrV zL?)=tHzM(w$K$eA@u~p}g{q$(^5jvWA=dM*yNPU56IwJj9&|d6A)mZRX`T9*6h-rB_9xhvvFHn;1X$7JpEDeW=QeoKXDkXkPv;`&|-?B-XB*QR)}p!A*NOIa=up zF%R1be5Xd^5xo#JwUz_|0e$TsnT?E8Kvkeki*?Q9-A@YSe!}Vx6bD7z(2)I5)=-EO zR#sR@&ad~h{d`Jq_T74UMBieCFE zZLS-&Sr?gKJSwf@U~6?r#oq|{;k{dUHx5szD^fdMToEhh5=va|hw0g^WX86Z{`@si zh?<nF6~^3~E^$2> zL1xs@?rL*Fa>$uB%`zl}lEVMHXXuYOC+9=G2{n8we;F*b*3b~csYCuw{~F1BVXogs zM`3yQ+DVb_8n+R3cNL^FR-B-G)P%v^GBeH_GW1TcUUNKnO@giLMS?Uuk8nZi%ysIE z4;>%fso(yRZ!>x}najDrj58#6`=J(jm*z^AluYLxS=AGFu$H8K*soc-p-~3Zkdh5F zK{xwU&lF0$!OryHy^%RwXlhrtl|WsP{+gL~YJU^BePmKjeJHsd*v zUf6u6{5Z}NxdDN#vDN(D&%1GJc0>l*{3v$2*zWZB%@N~oIW)|}zm<2fxQrIvGQ!_z zKT$C^{sc$4kAnGHRNN!FcDx7aiuKA@1g^-&VE>(7Apzn0vTrmAVf`x71TV(_j2J<# zg)vj!2i04By;|hQ%zvq>MP=_S^}M$ud~^~mP;h5H=diKmg@NL#s9~07qu}aF;*>?r zskOCM)EA^B%G3D$$dd^GqbRuC{+pS^#rzL(-P&f?T? zDbvO%Q?zQGsjhpz@jRI#Q`t(-4`EP3?~44@D+F14?ueJgFUll4N^2Rmgc4Q%8nxj^U@>7u zL*O?qEWa2Rn8E5^{d&NPw!XCGK_$sLlUnaKS-*U59?8O#?CNO}e}K3Yyn^9}#H8Ws z|C&qN`1jLhQuPJS8ph>e3Ap!7lZcgfIy?Diot$|fPB-v9UdU55}(aeg6(>Klz z9s0eB_PV5XAtR9M7?zK|2n5#r@-L5EM1Q?i6)`pLv>65bxP|>jo^%u_OW8q12e;af zXnR7)AXVf@J<5~N$`MW8=!N*vPdn00p#vQ8W!D~oXpA4Fum*&8UrOawNol#j`ACDEw2lqNfRwI*V7*Im>1#F_N+>wz#_&t)i zRp70LM^hiB1M;TT+R2u<61NUl)u&jPu{Dk@vR}l{$BY2Td6wlxsd;5O7!@X+^Bg>B zk{0q}b`1Q1RQE$&xjUtH$IMM0K{9`0dOF!hkZdhFKVwhx9ZQw7&U2KXG3|q8Sv@$T zTGBa*|N5!@|MRCt09N_hl)mFw^?Jo%;~m{EjR)Nj67u7qk)L#;e&ku?KK|Ni;14_5 zbPfqG^YrN8ZE<0Fg3dTOo+J%FgCnF8{-=JAi7Xt@l^@-*=XxRhVT26L82m^@cO=> zW@?K1y#qzA)WRgKr&Kv`_6W_&ku*^akHNEuxqRn*HpVL-?tJeZ?}qjrQ(F0auc#j4 zgr$))nogIlQx+a>%O9ZqxrQr13kooaZ;fvzOru+>@1a{s5i@t11o4HVpZ1sQ%bw$E=i`=jhjl#fm}8y;Jd&61$v!Ca8iPVjO2gD zR7b*d3ITmx#_aF6z@BY(upd)qnrIlX1M7;66zJQ$PUEkdg=Pon_ZJEt5?3X0-^IPw z+nE~BTb;_Ho>G(99sG2KInMj&t4pFIk*~Mcng5;C@5T5?oB}d`1FRhM>;<^vP6_OG z9JXSeivk6DNCc0EIWF=S>ZA7i*AcG#w82D^upQIZ0Jxe@ zj7P25s1?5=um0MZITwPe!DVV-AQZ~CsR(z`K-fqW(@g8Z*TA>a+-rv`SP_agx~iSy zeV1bMLzy`Q+@szEOps{NAKa?M5aYRc^aj+bO{VYSq-I!af{nV6Hb3yiFG5 z9}O`jYIEPwQrW%6fx`alp;RoB4#F1B$oJjs9>Rfc{uxgCkxbqiKk`}s>+i00FSlA~ zwzo6ryULeEd1bBd*SAov!^WG|)$^5g1;M+486lFdF-eylTT@eAQqcXFfo1mxl=bRS zFCYE0Bfw!-r3k6H=gBlF^*!had6i5Y-Bwx9=6$ymv#|i4mUBJ4{o=Gv1jFn2>>SE9 z-Q5WqK}=V5CJYwINyTozr+&rfd|h*a#>a@p#DCElm&A976tKE{ zbI3w8vLpZnHeB!MD*i3ebg_56S6tnT=zTslx8&&}4UZF;nHbvr@P52*raXi9j<7_d z;Us!nhx>T{jGkA)7QU?8v0jeKZtDakM(B#2cR|~ArEuw`_-)!4)83NnBjob~K0W^B zt0AtKwn{2SXj#*t=lx~+{5l$ExuECi$9>x#!|XOM#|rn4w^z}b^u3y8XJbpw7v8)f zp#Sx9z7C(ZuB=(S>(9e?x{mhKQ${wIGHjNXxBkU+z-1ZnROHp zMKFG!Wr9w}N^h*LO^wKIzw=%~&BcrzLm@9l*11>hS6_Vir8MlMLh^fuP)LcoDs~T)q4AD1f_L zwhLu^B=i<|!}W0PY!T(bQ5-z(OpP1Xf$_QBY@Z5;?U&$xqy#?aQ|e)Cy2IO*MlH{KasW*AAyl_h8jqG07eT4Ys$K&gVsafhe0U>;H`q;cd4B3;Jut=g}}N% z(ewp9K-h*3735g~o1**f21n)2K54P=c*V&(N6=w|4c1iWK5>D+R?^9U{!M9jP9>My@Ex1o;% zyn-Ix*5jMEY9C02Uy?_9yH1H|*0`>~wOB-#SLi=oYq@!j9~9+Q(ke^nx+LBjPxQ2V zl~MFr5LYsZXx!5w^7t%&N-UZJ^s(t`5#ik@IQ72=p3j9|erJ1&fv_bgGSi6*1=D$f zYEZ12ZD!xHY$kH&#j-ED#O*sii_5A)2HU4c3pN>J$650EuUqIy8U}{hXPd8A#n25o z-X11&C+iu;QR=Pykr{zEmRl9zqk ziLg1inmg~s-dC;tuWQv&n^yNNPeriG|Yk$+36QlDDjm>#EL&~CBBQu`ZRA6d1e z&1ft9ZlnS78sFJM@m6%!wTUYBCoP{!@Z-PlexcO8U4d0DkORa3GC4g=qvZ>_bXpFg zeyL8eX$>Q{@`6w2p!;Xpr2!gckWuFZL{sS2;uJ6jitO_TflL1pceX>JNx{xzT3A4|%( ztfk{0g9DYofhu2gH!WzN9`hFj4tPSerNUkl+Qol%H=B~%o7!D|dokbGgX0K)bj{5p zO*L9$j)@+_uRp|hdlQIIhK7h<{-?`Mi>;oz)uxL&;aYwP`!MTqzA7YPFO18j6WOp3 z0w(4CrT0wWVIe5t22a6KqJ5D{)k_a4b=wuPhiGHp2 zyPsX>3?Hy#1|;3zKw5U$Q-WWdhQ%Hv52`B^HrVgZgIO`T&)WVv%KiG;WK2_JrW)D2t`{94Ku#0(Jd3Ba}j`vN$`mIycx${vBtr|7kRuzfy} z##&+?(Rn5Wlbnz#z9Y#h#XbRgf=-0%u~DJfyr1el7q_`Hx2nrW2iI$tr@YlRZ3@Pc z)kOX^yG(w)&bS2gD~Z={eQt)9-;9jI;*g9>u*C{HyHsdh?v-?RZ$vo>Zzg|Mo#<6O z76zMdv4;FDx0zN>|2W%zQBzwGW}$vm9?DgC+gJ!sglRh(<~o)Xvej#ku%goz5L3nZ zFc_9{*z)j-#$jUfTFv0%!>Vn$u03`RBM!9YrGYYiv@s@6&|T(5+a*wj+*m*BNdXBSbJ;CqnT16NF(CEBQ})nD@4>HND?8VM3Kn zG0bed<(o6$eAlNr-qCPF-;JZ~X$JAjDdOo$6Zx)eHK5ONu7hQZ z?tQ@dl7co95=XUKVNCtse%V@a9E|>U#@~~)NVx=aU3!mab0;!rZDruuTJ(ix)ADTb zOiSDoe|0W45V+N6w)W&cRbrg5(PXeZ`=@o|CJCEqokgXk{C!)jGYGpdofq)O@rR^H z&tK`Zus5&De~hm;fD6BbgkLuD3>Hn-$zTh6^Bbx}XOXuSt$#i%LieY;Ix68OI<}Ya zs=Z>X`~DYikk+e2C}M{Lnw-wxso*5MyuZA%)c6Sh@p!FQHQ%xo;Z>!Ap8$e8wZ zS@V(LVT*-H}hV( z8KWBZZM0mhd@76{R13PbUIsZ1NtQ?4=*Y_#k!1c=7~dkn`U7Wxw#;w%*&1`$n$CZO z{zdln2$6lGSHUHC!q~?AoUg})&>-sZ4=6>|2N`^|Ho;*&j2bfIpa*DQpk- zAW=&meRJieFM~xx2zPz-$9-$?*Kgg?+na$EZ~g?e7}*S&?wGcRwK%ap`I5WuX89@S zvBHG^oim>NJbXA(Hzr1;j}L4IZ|CyInonoiyrs62O)#JuSZd^;Ws25l^JLLS%EOm8 zBoL>ocTkY3;YbiY*xrDq@2_+pKn`qeL#CZKW-zhy?*zDS*Wdp6%R-t;r@<%yM9urb5j>{_U0JepM073?XL)G zS#2i5?7(*AHuv?7Lo3iKrAQw;g(3$0>Dm)>q|a+uc7>_j=2`rLk1?~GnF}c3ut@*M z>#c$xxuAg9mM}cX8~^iaz{&|7!{tJG{Xf1*>VFDCagxmlmSO(iS1dh#G`ahH*YsEd=McHS1jZ_j4#LN+G-UrhUejloBnX)I z#_uUjMB#(T+YhwAZS~LL*ZPdMs}my|$@}GVJAN5vriY(%mJh!ynGd}B5FzYhSfZiz zM~SG`M_@j!ri2wP8bViRGq9Viz5OMW$z3()3WYp&312hTf7rzzVfGp&aHvF@^aVU| zPlKKb*$GB}ndAHJv99cy581G`u1ktk#cF-d<`X%Jh(KG7HEv;l(*L%lR6Y3%+y+;# z1N&zQV{yOIY|OVNV(_xB+m)KxPZ`_@K^LecZrP`ATeA?Vhz!p*R1Y+&&xV$KeqaRPvQRrv)?9MdZMkG%|IkE^$qf{g5Q#-kIQCTe5vj|rxDs8(9!w>ft>cU zG1$Gms82JW$AVoqSq&LafEup$K4aC502wFvk(9mu=YU|fzxHq&O3s#ErG}r2VmW3Y zfvfeonvcXwXkWozrMdTfb^C$zAFIK0(96+c`0w?q`O`@#V`1s-QX}*jQ7Ebbg9KR< z68<%tsVzAi3HVMblWQ+-`XkM@*?qCyw|cz59y^h#2GR&syBX@CZk56RGm61`|L+Lz zSHu;&5g)2=-05tn{^xl3C(3C%r2h+HH9~ks22m#JKz*BIhp_Ry$u4;!rw3bamnn-y zmejuo3q%Kl?p9WSuuCRe#6CMMU8k&cP7chvPfAHgJ`4Mm3c)>ZBL^7wDh;Cq+tg$~ z%m-cqo*0$_QNW*88q&9|Zout5me?BNfGWC5lNI$sdHXGC)2K9NwqbSUH!R?e$fR#S z+0RVjt-M{HVA1e_epc1#1?~T}ziNGYg#|4Vu>X7f4%rmo_cn?nhGYa*;P(sy8Iweg zzkc_cw(hzu5+QA3Q9*lkR`f)(O=o{(UL_7V30Jd*+<6e@OVuy36@&!X;DW|h_|-%H zk2Hl4`xycd@MQe*-E!U1+zJ2j68nC;Ww9)*Ep5rEQUcAkM(d%^Q$e6DZH>mjvtd|U zDHT`d4$onx;K3gvXQgVN9>b@pe(k@AY=^TQo$-WDvIc&?$?L z6#-Uv&p0hiK0XW3v$UkI-b+h&}Q5&aB(0jA`;ZlYb4mG5Uv_fo_C@r~ne|KTurF>?HqBzeq zJ~QNvhn%dLZrh5h?}NJHw$`E<9oc17n%26~-pl99{ijibEUf5Vd%T4oAF-*OoG)PB zkJzCl#0yI!CFXkCT3OYTFB7F9pPE#wzrfTDo*w%mH8EArMQ8z7y4#)K3AWN8mCh@zhU1Sg|Vw@7xtquq$i+B*mX zJYVq0OH?u1{?jXtrHLpWwo$m+|aU_Jo;*-|rDoI3Pv$-0IkaF= zGn^Nl_<2ikwuvz)V}u=B0`u@wkh?lO5fRadiL&Oxj+0ZQs(yW3{zU&idhA}jI|f3O zq?n+j5<5O>1v`1%LARa?cqX4@-9pkS(rR!AD#n}Ld>RXOLJjRS;>$`E1aKmM@alI9 z>z$NRqGC5t2JsFdkig_T9BiSN3}+nL4?i#q!2cO>CxT&iGOpI!PqALCQaU_XK*sDG zS>HMj9)nBpx_+oD9bu#)Uh2D!)fu}}v$vsGZ1_qxrI}GPgQkKDyz}Iy+v?%hEP|S=;e{A@c zlm~9IV_0fqUGk|&Y!|+Hjm9#^XPdg>nJ-!=i##+uvDaU|gl0FC0`v{KIlay-QkO)e|~KFjY27;rVAaO5esb`Es`n6%|fjD#5&uQT21iV$1lF5M5p^ zB$CAYQ&S1OdIk0P(-)N&6E|d=zKm;N@lY0qbGL{%CW@8^1E98s>WB914geakU9?O$aIFD8Yk+o9u-QW~YNQUKX7M-a+ zFRfjgEgy2))<^)2C7ND5a3Tw&9H4;uk8&X~`SxMl?lz6Zr4icDQcj&?JxXc9H#pfk zuR^V9huy8|FcwcTn$pK5{RxfR^K~p zU494=ZBa22LIKNSfS)c;&x~IL^Zr;|qITy}540x*wJAISFLmTQjvl-X>A9bAH_3j_0u$lQISjIuti%6>`mNtM2THbWYG z&+GyWPgVdcWnf_>&Pv|V31>2zdOq{;b+$w?B}Im#BLI?dRnp zs)v*pOFW~*&no8UwzOn6g9vi~DhJAQi3>+GRahWcjR` zGjnRW@!GM+_tZ%Qc1J<61AzrdKZR!H{GpnGI~BuDLI7r zu=TxC0-L-GJ$p-q5;w_hz~YPYEIv=-<%g$NcPEA#T5UmVol-AF@P_TG6~I=XH{V5p z_g~Q6ueDfN{Q6C5&x5{Y`hOIScF$Il`?T!^RI}8 zK?4wr$$qGEV6?WUr+Z8Ge@cH%FiK+7t`Au0-y}n zG{Z#x{m++MXq2f`b-wg5@}Ccai~o;jX)r*8PKb^jw`<4v=G zlb_-`zQfmKQpPtt5@f9Z{ZXlEoftW5$g)a%M~9iz@e9|kTzVn}l7VD6PZok4Wzqcy zTpO(FvbDdBM;}xZA8cu9v9v8~5Q3)m`PEMWO2zdg!|qRv2#_NIc!mI_(}OUm87!*? zE>eOH+MePN<#-`X56Sui9u)d?Uy;qB&z@fU=1Dpqu6*PE>V0C zU07ubg96?!##-54z-B*7RH2iGmmKReSqfPJpv!{c?JmZe?RjgyC^?hxug+VD%jKi` z=BWSxwnst}nF}g6$v}yFoj-VI*{Uc5X}yo}i_(${S+=vQ4ukq{$}~X(gls&OO*&f+@op; zCd;T{00c)DnxPU#1t|p&24fi!LK#|h0As~m2mRv#EXECy6_=SCWCpv;^v&K5p){5{ z-pE!i7)C_7!tBTe;)VULA~E^<1=A*eaHVO(2rJwH0CjDa9Z22qBD16?JFf`hHm_5% zp3Xp&3 z^MqCl8W)uO*4^X$S|c5cfSRdre{_*a!T0v?4@sQ;2v=1mZ~9~Jb2~JfMt`yOj{>5K zzlwSAgA7M3vKc=s94KgWP1R$=7xNHVbrLTYxe^?zRNuAp={Bx~u?7%uf%e^rJ5I=@ z>(l1SIdAQZ-ogEccb`EPip#4I#YG9zUChBu6|8_6lYR48beGj?UIfE4@z$JM(;=R3 z`{*#-KmyFRI)8NN3SyKZ8mEy5;68=BW0hY{fCR{%lO5TQFm2t1yskG?gIhLcXi&;w zD}x+#jDRQ}l%8j|($@O6XKj|^LhRL`y+?e~jEWauAbfAGInHgdZh87NQ0|)B0wOvp z)1WECAlO z{fB!-ntW%q)@3;!1-d&aCDz%h?uGrQA;EfzK;lMX7;ovfk`+;s+^?YSv^?dNgQo@4 zy!D%C2V5tTD#jynx1SKki&Jm!3(Cd*Ja^ji#D?xtgkh_!eAo2$Do(fST|_u?m&MQd z=Z_0#5+kK1VpR81^8yf#k4IHX0LnKWc6spDq9^dDQ5J)V_3?QG#c@zl17u%%I+a~b z>OBX2D9uLLPPT9BJJYn=Nh=6*7a0dXOkAjvdv%SS-zni9o(6TLzVb{{c*^8Hb1~XN zQW&ZjsR9cV0NWZGvI6>9RA`8h^9Y8p_O^a+XTIF2kt|7yoojR~Q0zU5?snch1+0)y zEhvI9Nm?xyFETZT>RG*6=$&=dSlm7G{e^wHi7g|QiGX;yRrZ_qj?&V%vq(S?d_o|9 z#?)_p(N2+4DRk%E2&7jZ9-30<5jQO|w&05o6%$K{^SoRgV?grBI4i}`dkPo0rZ~YeXEjlMPN~Nu7`bwfU-X2olVI@ z8+r#;TGKQjZ1bWmoq;#KMRMzN36Zr@LK-%@eKZ|Yh^8p$G}craz4PVNG#ABqHWi7Z z3tVNU^)<`4Eia?7%KTH*4px1o(e-J+%D^DwM@M^efnL7;6=8mb zLf=nu)1_wr>P1#*@$mmjs=(KKgk!c53|e3d;XO5?pHXz(M5~s(yH>JEq(5 z;x&!j0YV_A1Q%R4Y&ag~FvK$kIux}(BQqc#0Hz)GUJ+?LnvP|OG)UG|f4$h_cB4Ne zWae+tMne#7oF!9C{~2wx20=_E6F-N)Hem2dk2G+AmGs5_5%DA@@Pe3}rRl%g1kp?Y yE&NZLEg*Z(@$vC-Y%R^c{V#R#KiDk$H-X` 特殊 token 的方式计算 reward 得分,因此当切换模型的词表发生变化时,该特殊 token 的 id 也需要进行相应的修改,我们通常会使用词表末尾未使用的 token 作为 reward token。 + +例如,在 InternLM2 中我们使用 `[UNUSED_TOKEN_130]` 作为 reward token: + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft' +reward_token_id = 92527 # use [UNUSED_TOKEN_130] as reward token +``` + +如果用户将模型切换为llama3,我们则可以使用 `<|reserved_special_token_0|>` 作为 reward token: + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' +reward_token_id = 128002 # use <|reserved_special_token_0|> as reward token +``` + +### 训练数据 + +在 Reward Model 训练中,你可以通过 `max_length` 来指定单个样本序列的最大 token 数,XTuner 会自动对数据进行截断或是填充。 + +```python +# Data +max_length = 2048 +``` + +在配置文件中,我们通过 `train_dataset` 字段来指定训练数据集,你可以通过 `dataset` 字段指定数据集的加载方式,通过 `dataset_map_fn` 字段指定数据集的映射函数。 + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='argilla/ultrafeedback-binarized-preferences-cleaned'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=False, + is_reward=True, + reward_token_id=reward_token_id, + num_proc=32, + use_varlen_attn=use_varlen_attn, + max_packed_length=max_packed_length, + shuffle_before_pack=True, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict( + type=preference_collate_fn, use_varlen_attn=use_varlen_attn)) +``` + +上述配置中,我们使用了 `load_dataset` 来加载 huggingface 上的 `argilla/ultrafeedback-binarized-preferences-cleaned` 数据集,使用 `orpo_dpo_mix_40k_map_fn` 作为数据集映射函数(这是因为 `orpo_dpo_mix_40k` 与 `ultrafeedback-binarized-preferences-cleaned` 的格式相同,因此这里共用了同一个映射函数)。 + +关于如何处理数据集以及如何编写数据集映射函数,请参考[偏好数据集章节](./preference_data.md)。 + +### 加速训练 + +在使用偏好数据训练时,我们推荐您开启[变长注意力机制](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html), 以避免单个偏好内的 chosen 和 rejected 的样本长度差异造成的显存浪费。你可以通过 `use_varlen_attn=True` 来开启变长注意力机制。 + +XTuner 中还支持了大量的训练加速方法,关于它们的使用方法,请参考[加速策略章节](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/hyper_parameters.html)。 diff --git a/docs/zh_cn/reward_model/overview.md b/docs/zh_cn/reward_model/overview.md new file mode 100644 index 000000000..84b5ab14b --- /dev/null +++ b/docs/zh_cn/reward_model/overview.md @@ -0,0 +1,29 @@ +## Reward Model 介绍 + +### 简介 + +Reward Model(奖励模型)是强化学习过程中一个关键的组成部分。它的主要任务是根据给定的输入和反馈来预测奖励值,从而指导学习算法的方向。在RLHF(Reinforcement Learning from Human Feedback)中,Reward Model 通过整合人类反馈,帮助强化学习算法更有效地优化策略。 + +在大语言模型训练中,Reward Model 通常指的是偏好模型(Preference Model)。通过在训练时提供相同提示词的好与坏(chosen&rejected)的回复来拟合人类的偏好,并在推理时预测出一个奖励值,以指导 RLHF 过程中 Actor 模型的优化过程。 + +Reward Model的应用场景包括但不限于: + +- **RLHF训练**:在使用 Proximal Policy Optimization(PPO)算法进行 RLHF 训练时,Reward Model提供奖励信号,指导模型优化策略,提高生成内容的质量并使其更贴近人类偏好。 +- **BoN采样**:在 Best-of-N(BoN)采样过程中,用户可以使用 Reward Model 对同一个提示词的多条回复进行打分,并选择奖励得分最高的生成结果,从而提升模型的输出效果。 +- **数据构造**:Reward Model 可以用于评估和过滤训练数据,或者也可以使用 Reward Model 替代人工标注来构造 DPO 训练数据。 + +### XTuner 中 Reward Model 训练的优势 + +XTuner 中的 Reward Model 训练具备以下显著优势: + +1. **使用最新的训练技巧**:XTuner 中集成了 InternLM2 中的 Reward Model 训练损失函数,可以稳定奖励得分的数值范围,也可以减少在简单样本上的过拟合(具体可参考 [InternLM2 技术报告](https://arxiv.org/abs/2403.17297))。 + +2. **减少显存浪费**:由于偏好数据中的 chosen 和 rejected 数据通常存在长度上的差异,因此在训练数据的拼接时会存在填充(padding token),造成显存浪费。在 XTuner 中,基于 Flash Attention2 中的变长注意力功能,我们在训练过程中通过将偏好数据打包到同一个序列中,显著减少了由于 padding token 带来的显存浪费。这不仅提高了显存的利用效率,还使得在相同硬件条件下可以训练更大的模型或处理更多的数据。 + +![img](./images/var_len_atten.png) + +3. **高效训练**:借助 XTuner 的 QLoRA 训练功能,我们能够仅对 Reward Model 的 Value Head 进行全参数训练,而对语言模型本身使用 QLoRA 微调,大幅降低了模型训练的显存开销。 + +### 开始训练 + +请参[阅快速上手](./quick_start.md)来了解最基本的概念,若希望了解更多训练参数配置相关的内容,请参考[修改Reward Model配置](./modify_settings.md)章节。 diff --git a/docs/zh_cn/reward_model/preference_data.md b/docs/zh_cn/reward_model/preference_data.md new file mode 100644 index 000000000..1dd296053 --- /dev/null +++ b/docs/zh_cn/reward_model/preference_data.md @@ -0,0 +1,110 @@ +## 偏好数据集 + +### 简介 + +XTuner 的 Reward Model 与 DPO、ORPO 等依赖偏好数据的算法都采用了同样的数据格式,偏好数据集中的每一条训练样本需要包含以下三个字段:`prompt`、`chosen`、`rejected`。其中每个字段的值都使用了 [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) 格式。一个具体的例子如下所示: + +```json +{ + "prompt": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Who won the world series in 2020?" + }, + { + "role": "assistant", + "content": "The Los Angeles Dodgers won the World Series in 2020." + }, + { + "role": "user", + "content": "Where was it played?" + } + ], + "chosen": [ + { + "role": "assistant", + "content": "The 2020 World Series was played at Globe Life Field in Arlington, Texas." + } + ], + "rejected": [ + { + "role": "assistant", + "content": "I don't know." + } + ] +} +``` + +当进行 Reward Model 训练或是 DPO 训练时,xtuner 会根据训练任务类型的不同,将偏好数据集处理为不同的训练标签。 + +![img](./images/preference_data.png) + +如上图所示,当进行 Reward Model 训练时,我们参考 ChatGPT 的训练方式,在对话数据的最后添加一个特殊的`<|reward|>` token,只对该 token 输出的 logits 计算损失。而当进行 DPO 系列算法的训练时,我们则会屏蔽掉 prompt 部分的 token,只对 chosen 和 rejected 的回复部分计算损失。在配置文件中,我们通过 dataset 中的 `is_reward` 和 `is_dpo` 两个字段来控制数据集的类型。 + +### 使用自定义数据进行训练 + +在理解了单个样本的数据格式之后,我们只需要将转换好的数据保存为[JSON Lines](https://jsonlines.org/)格式即可。文件中的每一行都是一个样本。 + +```json +{"prompt": [{"content": "How can I learn to better regulate my emotions as a grown adult?", "role": "user"}], "chosen": [{"content": "You could do emotional regulation exercises with me, and try to notice and practice ways that you could better control your emotions in real life.", "role": "assistant"}], "rejected": [{"content": "Do you mean how can you learn to be more emotionally mature?", "role": "assistant"}]} +{"prompt": [{"content": "What are some interesting things I can add to a grilled cheese sandwich?", "role": "user"}, {"content": "A tomato, a slice of pickle, some chopped pickles, some chopped green onions. Also, some mayonnaise if you like.", "role": "assistant"}, {"content": "Sounds great. Anything else?", "role": "user"}], "chosen": [{"content": "If you’d like something a little different, I recommend grating some horseradish on top of the sandwich.", "role": "assistant"}], "rejected": [{"content": "Maybe some nice fresh basil?", "role": "assistant"}]} +{"prompt": [{"content": "How do I bob for apples?", "role": "user"}], "chosen": [{"content": "Ah! Do you mean the game of “applesauce,” or do you mean the idea of “to bob for apples”?", "role": "assistant"}], "rejected": [{"content": "Sorry, I don’t know that term.", "role": "assistant"}]} +...... +``` + +在准备完自定义数据集之后,你需要将你保存的数据路径填入配置文件中的 `data_files` 字段,你可以同时加载多个 jsonl 数据进行训练。 + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_jsonl_dataset, + data_files=[ + '/your/jsonl/path/here.jsonl', + '/your/another/jsonl/path/here.jsonl' + ]), +) +``` + +### 使用开源数据集进行训练 + +与 XTuner 配置 SFT 数据一样,在使用 huggingface 上的开源数据集时,我们只需要定义映射函数 map_fn,将开源数据集格式处理为 XTuner 中的数据格式即可。 + +这里我们以 Intel/orca_dpo_pairs 为例,该数据集有 `system`、`question`、`chosen`、`rejected` 四个字段,并且每个字段的值为 text 而非 [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) 格式。因此我们需要为该数据集定义一个 map_fn: + +```python +def intel_orca_dpo_map_fn(example): + prompt = [{ + 'role': 'system', + 'content': example['system'] + }, { + 'role': 'user', + 'content': example['question'] + }] + chosen = [{'role': 'assistant', 'content': example['chosen']}] + rejected = [{'role': 'assistant', 'content': example['rejected']}] + return {'prompt': prompt, 'chosen': chosen, 'rejected': rejected} +``` + +通过代码可以看到,`intel_orca_dpo_map_fn` 对原数据中的四个字段进行处理,将其转换为了 `prompt`、`chosen`、`rejected` 三个字段,并且每个字段都处理为了[OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) 格式,确保了后续数据处理流程的统一。 + +完成了 map_fn 的定义之后,需要在配置文件中 import 该函数,并在 `dataset_map_fn` 字段中进行配置。 + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='Intel/orca_dpo_pairs'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=intel_orca_dpo_map_fn, +) +``` diff --git a/docs/zh_cn/reward_model/quick_start.md b/docs/zh_cn/reward_model/quick_start.md new file mode 100644 index 000000000..3762a4e8c --- /dev/null +++ b/docs/zh_cn/reward_model/quick_start.md @@ -0,0 +1,86 @@ +## Reward Model 快速上手 + +在本章节中,我们将介绍如何使用 XTuner 训练 1.8B 的 Reward Model,以帮助您快速上手。 + +### 准备预训练模型权重 + +依据 [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) 论文中的描述,我们使用进过 SFT 的语言模型作为 Reward Model 的初始化模型。这里我们使用[InternLM2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)作为初始化模型。 + +在训练配置文件中设置`pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft'`,则会在启动训练时自动下载模型文件。若您需要手动下载模型权重,那么请参考[准备预训练模型权重](https://xtuner.readthedocs.io/zh-cn/latest/preparation/pretrained_model.html)章节,其中详细说明了如何从 Huggingface 或者是 Modelscope 下载模型权重的方法。这里我们附上模型的 HuggingFace 链接与 ModelScope 链接: + +- HuggingFace 链接位于:https://huggingface.co/internlm/internlm2-chat-1_8b-sft + +- ModelScope 链接位于:https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary + +### 准备训练数据 + +在本教程中使用 [UltraFeedback](https://arxiv.org/abs/2310.01377) 数据集作为演示,为了方便起见,我们使用 huggingface 上已经预处理过的 [argilla/ultrafeedback-binarized-preferences-cleaned](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned) 数据集, + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='argilla/ultrafeedback-binarized-preferences-cleaned'), + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=False, + is_reward=True, +) +``` + +在配置文件中使用以上配置,即可自动下载并处理该数据集。如果您希望使用其他 huggingface 上的开源数据集或是使用自定义的数据集,请参阅[偏好数据集](./preference_data.md)章节。 + +### 准备配置文件 + +XTuner 提供了多个开箱即用的配置文件,可以通过 `xtuner list-cfg` 查看。我们执行如下指令,以复制一个配置文件到当前目录。 + +```bash +xtuner copy-cfg internlm2_chat_1_8b_reward_full_ultrafeedback . +``` + +打开复制后的配置文件,如果您选择自动下载模型和数据集,则无需修改配置。若您希望填入您预先下载的模型路径和数据集路径,请修改配置中的 `pretrained_model_name_or_path` 以及 `train_dataset` 中 `dataset` 的 `path` 参数。 + +更多的训练参数配置,请参阅[修改Reward训练配置](./modify_settings.md)章节。 + +### 启动训练 + +在完成上述操作后,便可以使用下面的指令启动训练任务了。 + +```bash +# 单机单卡 +xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py +# 单机多卡 +NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py +# slurm 集群 +srun ${SRUN_ARGS} xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py --launcher slurm +``` + +正确的训练日志应当如下所示(在单卡 A800 上运行): + +``` +06/06 16:12:11 - mmengine - INFO - Iter(train) [ 10/15230] lr: 3.9580e-07 eta: 2:59:41 time: 0.7084 data_time: 0.0044 memory: 18021 loss: 0.6270 acc: 0.0000 chosen_score_mean: 0.0000 rejected_score_mean: 0.0000 num_samples: 4.0000 num_tokens: 969.0000 +06/06 16:12:17 - mmengine - INFO - Iter(train) [ 20/15230] lr: 8.3536e-07 eta: 2:45:25 time: 0.5968 data_time: 0.0034 memory: 42180 loss: 0.6270 acc: 0.5000 chosen_score_mean: 0.0013 rejected_score_mean: 0.0010 num_samples: 4.0000 num_tokens: 1405.0000 +06/06 16:12:22 - mmengine - INFO - Iter(train) [ 30/15230] lr: 1.2749e-06 eta: 2:37:18 time: 0.5578 data_time: 0.0024 memory: 32121 loss: 0.6270 acc: 0.7500 chosen_score_mean: 0.0016 rejected_score_mean: 0.0011 num_samples: 4.0000 num_tokens: 932.0000 +06/06 16:12:28 - mmengine - INFO - Iter(train) [ 40/15230] lr: 1.7145e-06 eta: 2:36:05 time: 0.6033 data_time: 0.0025 memory: 42186 loss: 0.6270 acc: 0.7500 chosen_score_mean: 0.0027 rejected_score_mean: 0.0016 num_samples: 4.0000 num_tokens: 994.0000 +06/06 16:12:35 - mmengine - INFO - Iter(train) [ 50/15230] lr: 2.1540e-06 eta: 2:41:03 time: 0.7166 data_time: 0.0027 memory: 42186 loss: 0.6278 acc: 0.5000 chosen_score_mean: 0.0031 rejected_score_mean: 0.0032 num_samples: 4.0000 num_tokens: 2049.0000 +06/06 16:12:40 - mmengine - INFO - Iter(train) [ 60/15230] lr: 2.5936e-06 eta: 2:33:37 time: 0.4627 data_time: 0.0023 memory: 30238 loss: 0.6262 acc: 1.0000 chosen_score_mean: 0.0057 rejected_score_mean: 0.0030 num_samples: 4.0000 num_tokens: 992.0000 +06/06 16:12:46 - mmengine - INFO - Iter(train) [ 70/15230] lr: 3.0331e-06 eta: 2:33:18 time: 0.6018 data_time: 0.0025 memory: 42186 loss: 0.6247 acc: 0.7500 chosen_score_mean: 0.0117 rejected_score_mean: 0.0055 num_samples: 4.0000 num_tokens: 815.0000 +``` + +### 模型转换 + +XTuner 已经集成好了将模型转换为 HuggingFace 格式的工具,我们只需要执行 + +```bash +# 创建存放 hf 格式参数的目录 +mkdir work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy/iter_15230_hf + +# 转换格式 +xtuner convert pth_to_hf internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py.py \ + work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230.pth \ + work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230_hf +``` + +便能够将 XTuner 的 ckpt 转换为 Huggingface 格式的模型。 + +需要注意的是,由于 Reward Model 的类型并未在 transformers 官方库中集成,因此目前只有InternLM2模型训练得到的 Reward Model 会被转换为 InternLM2ForRewardModel 类型,而其他模型则会默认转换为 SequenceClassification 类型(例如 LLaMa3 会被转换为 LlamaForSequenceClassification 类型),但这并不影响其在 XTuner PPO 训练中的使用。 From 44749c2adf214add084f01514ec2ce463c506e8a Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Wed, 3 Jul 2024 20:05:13 +0800 Subject: [PATCH 06/29] Support internlm2.5 (#803) * add internlm 2.5 configs * update readme --- README.md | 31 +-- README_zh-CN.md | 29 +-- ...chat_7b_full_finetune_custom_dataset_e1.py | 226 ++++++++++++++++++ .../internlm2_5_chat_7b_qlora_alpaca_e3.py | 219 +++++++++++++++++ .../internlm2_5_chat_7b_qlora_oasst1_e3.py | 219 +++++++++++++++++ 5 files changed, 688 insertions(+), 36 deletions(-) create mode 100644 xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_full_finetune_custom_dataset_e1.py create mode 100644 xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_alpaca_e3.py create mode 100644 xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_oasst1_e3.py diff --git a/README.md b/README.md index e0be695ef..4e729226c 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,8 @@ English | [简体中文](README_zh-CN.md) ## 🎉 News +- **\[2024/07\]** Support [InternLM 2.5](xtuner/configs/internlm/internlm2_5_chat_7b/) models! +- **\[2024/06\]** Support [DeepSeek V2](xtuner/configs/deepseek/deepseek_v2_chat/) models! **2x faster!** - **\[2024/04\]** [LLaVA-Phi-3-mini](https://huggingface.co/xtuner/llava-phi-3-mini-hf) is released! Click [here](xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336) for details! - **\[2024/04\]** [LLaVA-Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b) and [LLaVA-Llama-3-8B-v1.1](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1) are released! Click [here](xtuner/configs/llava/llama3_8b_instruct_clip_vit_large_p14_336) for details! - **\[2024/04\]** Support [Llama 3](xtuner/configs/llama) models! @@ -100,16 +102,15 @@ XTuner is an efficient, flexible and full-featured toolkit for fine-tuning large @@ -203,14 +204,14 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar xtuner train ${CONFIG_NAME_OR_PATH} ``` - For example, we can start the QLoRA fine-tuning of InternLM2-Chat-7B with oasst1 dataset by + For example, we can start the QLoRA fine-tuning of InternLM2.5-Chat-7B with oasst1 dataset by ```shell # On a single GPU - xtuner train internlm2_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 + xtuner train internlm2_5_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 # On multiple GPUs - (DIST) NPROC_PER_NODE=${GPU_NUM} xtuner train internlm2_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 - (SLURM) srun ${SRUN_ARGS} xtuner train internlm2_chat_7b_qlora_oasst1_e3 --launcher slurm --deepspeed deepspeed_zero2 + (DIST) NPROC_PER_NODE=${GPU_NUM} xtuner train internlm2_5_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 + (SLURM) srun ${SRUN_ARGS} xtuner train internlm2_5_chat_7b_qlora_oasst1_e3 --launcher slurm --deepspeed deepspeed_zero2 ``` - `--deepspeed` means using [DeepSpeed](https://github.com/microsoft/DeepSpeed) 🚀 to optimize the training. XTuner comes with several integrated strategies including ZeRO-1, ZeRO-2, and ZeRO-3. If you wish to disable this feature, simply remove this argument. @@ -231,18 +232,10 @@ XTuner provides tools to chat with pretrained / fine-tuned LLMs. xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments] ``` -For example, we can start the chat with - -InternLM2-Chat-7B with adapter trained from oasst1 dataset: - -```shell -xtuner chat internlm/internlm2-chat-7b --adapter xtuner/internlm2-chat-7b-qlora-oasst1 --prompt-template internlm2_chat -``` - -LLaVA-InternLM2-7B: +For example, we can start the chat with InternLM2.5-Chat-7B : ```shell -xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-patch14-336 --llava xtuner/llava-internlm2-7b --prompt-template internlm2_chat --image $IMAGE_PATH +xtuner chat internlm/internlm2_5-chat-7b --prompt-template internlm2_chat ``` For more examples, please see [chat.md](./docs/en/user_guides/chat.md). diff --git a/README_zh-CN.md b/README_zh-CN.md index c5037d28c..16c1a2af2 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -39,6 +39,8 @@ ## 🎉 更新 +- **\[2024/07\]** 支持 [InternLM 2.5](xtuner/configs/internlm/internlm2_5_chat_7b/) 模型! +- **\[2024/06\]** 支持 [DeepSeek V2](xtuner/configs/deepseek/deepseek_v2_chat/) models! **训练速度提升一倍!** - **\[2024/04\]** 多模态大模型 [LLaVA-Phi-3-mini](https://huggingface.co/xtuner/llava-phi-3-mini-hf) 发布!快速开始请查阅此[文档](xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336)! - **\[2024/04\]** 多模态大模型 [LLaVA-Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b) 和 [LLaVA-Llama-3-8B-v1.1](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1) 发布!快速开始请查阅此[文档](xtuner/configs/llava/llama3_8b_instruct_clip_vit_large_p14_336)! - **\[2024/04\]** 支持 [Llama 3](xtuner/configs/llama) 模型! @@ -100,16 +102,15 @@ XTuner 是一个高效、灵活、全能的轻量化大模型微调工具库。 @@ -203,14 +204,14 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. xtuner train ${CONFIG_NAME_OR_PATH} ``` - 例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM2-Chat-7B: + 例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM2.5-Chat-7B: ```shell # 单卡 - xtuner train internlm2_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 + xtuner train internlm2_5_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 # 多卡 - (DIST) NPROC_PER_NODE=${GPU_NUM} xtuner train internlm2_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 - (SLURM) srun ${SRUN_ARGS} xtuner train internlm2_chat_7b_qlora_oasst1_e3 --launcher slurm --deepspeed deepspeed_zero2 + (DIST) NPROC_PER_NODE=${GPU_NUM} xtuner train internlm2_5_chat_7b_qlora_oasst1_e3 --deepspeed deepspeed_zero2 + (SLURM) srun ${SRUN_ARGS} xtuner train internlm2_5_chat_7b_qlora_oasst1_e3 --launcher slurm --deepspeed deepspeed_zero2 ``` - `--deepspeed` 表示使用 [DeepSpeed](https://github.com/microsoft/DeepSpeed) 🚀 来优化训练过程。XTuner 内置了多种策略,包括 ZeRO-1、ZeRO-2、ZeRO-3 等。如果用户期望关闭此功能,请直接移除此参数。 @@ -233,16 +234,10 @@ xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional 例如: -与 InternLM2-Chat-7B, oasst1 adapter 对话: +与 InternLM2.5-Chat-7B 对话: ```shell -xtuner chat internlm/internlm2-chat-7b --adapter xtuner/internlm2-chat-7b-qlora-oasst1 --prompt-template internlm2_chat -``` - -与 LLaVA-InternLM2-7B 对话: - -```shell -xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-patch14-336 --llava xtuner/llava-internlm2-7b --prompt-template internlm2_chat --image $IMAGE_PATH +xtuner chat internlm/internlm2-chat-7b --prompt-template internlm2_chat ``` 更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 diff --git a/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_full_finetune_custom_dataset_e1.py b/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_full_finetune_custom_dataset_e1.py new file mode 100644 index 000000000..bc8a2816a --- /dev/null +++ b/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_full_finetune_custom_dataset_e1.py @@ -0,0 +1,226 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Data format: +[ + { + "conversation": [ + { + "system": "", + "input": "xxx", + "output": "xxx" + }, + { + "input": "xxx", + "output": "xxx" + } + ] + }, +... +] +Please refer to https://github.com/InternLM/xtuner/blob/main/docs/en/user_guides/dataset_format.md for details. +""" # noqa: E501 +from datasets import load_dataset +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR +from torch.optim import AdamW +from torch.utils.data import BatchSampler +from transformers import AutoModelForCausalLM, AutoTokenizer + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.samplers import InternRepoSampler +from xtuner.engine import (DatasetInfoHook, EvaluateChatHook, ThroughputHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2_5-7b-chat' +use_varlen_attn = True + +# Data +data_files = ['/path/to/json/file.json'] +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 32768 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +# batch size per device, set to 1 if `use_varlen_attn` = True +# To clarify, enlarging the batch size essentially enlarges the `max_length`. +# For example, doubling the max length is tantamount to doubling the batch size +batch_size = 1 +accumulative_counts = 1 # 1bs * 1acc * 64gpu = 64 batchsize +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +lr = 4e-5 +betas = (0.9, 0.95) +weight_decay = 0.01 +max_norm = 1 # grad clip +warm_up_ratio = 0.025 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = '' +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=process_hf_dataset, + use_varlen_attn=use_varlen_attn, + dataset=dict(type=load_dataset, path='json', data_files=data_files), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=None, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=InternRepoSampler, shuffle=True, seed=1024), + batch_sampler=dict( + type=BatchSampler, drop_last=True, batch_size=batch_size), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', +) + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1 / 40, + by_epoch=True, + begin=0, + end=warm_up_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=lr * 0.15, + by_epoch=True, + begin=warm_up_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict( + type=DatasetInfoHook, tokenizer=tokenizer, + is_intern_repo_dataset=True), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template), + dict(type=ThroughputHook) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 100 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=1), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +log_processor = dict( + by_epoch=False, + window_size=1, + mean_pattern=r'.*(loss|time|data_time|grad_norm|tflops).*') diff --git a/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_alpaca_e3.py b/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_alpaca_e3.py new file mode 100644 index 000000000..7dfc92617 --- /dev/null +++ b/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_alpaca_e3.py @@ -0,0 +1,219 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2_5-7b-chat' +use_varlen_attn = False + +# Data +alpaca_en_path = 'tatsu-lab/alpaca' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 1 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_en = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_en_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_en, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_oasst1_e3.py b/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_oasst1_e3.py new file mode 100644 index 000000000..98b097efb --- /dev/null +++ b/xtuner/configs/internlm/internlm2_5_chat_7b/internlm2_5_chat_7b_qlora_oasst1_e3.py @@ -0,0 +1,219 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import oasst1_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2_5-7b-chat' +use_varlen_attn = False + +# Data +data_path = 'timdettmers/openassistant-guanaco' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = '' +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=data_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=oasst1_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) From 9c28b40790f139e29625ad273e0878d3649f542e Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Tue, 9 Jul 2024 08:41:43 +0800 Subject: [PATCH 07/29] [Bugs] fix dispatch bugs when model not in LOWEST_TRANSFORMERS_VERSION (#802) * fix dispatch bugs when model not in LOWEST_TRANSFORMERS_VERSION * move rope_theta --- xtuner/model/modules/dispatch/__init__.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/xtuner/model/modules/dispatch/__init__.py b/xtuner/model/modules/dispatch/__init__.py index 7cb159515..e81ec7a3a 100644 --- a/xtuner/model/modules/dispatch/__init__.py +++ b/xtuner/model/modules/dispatch/__init__.py @@ -228,14 +228,14 @@ def replace_rote(model): from mmengine import print_log print_log = log_once(print_log) - assert hasattr(model.config, 'rope_theta'), \ - '`rope_theta` should be in the model config.' - rope_theta = model.config.rope_theta - def traverse(module): for name, child in module.named_children(): cls_name = type(child).__name__ if cls_name in ROTE_DISPATCH_MAPPING: + assert hasattr(model.config, 'rope_theta'), \ + '`rope_theta` should be in the model config.' + rope_theta = model.config.rope_theta + rote = ROTE_DISPATCH_MAPPING[cls_name] rote = rote.build() print_log(f'replace {cls_name}', 'current') @@ -258,10 +258,11 @@ def check(model_name): # a walkaround for reward model model_name = model_name[:-5] + 'ForCausalLM' msg = '{} requires transformers version at least {}, but got {}' - assert TRANSFORMERS_VERSION >= LOWEST_TRANSFORMERS_VERSION[ - model_name], msg.format(model_name, - LOWEST_TRANSFORMERS_VERSION[model_name], - TRANSFORMERS_VERSION) + if model_name in LOWEST_TRANSFORMERS_VERSION: + assert TRANSFORMERS_VERSION >= LOWEST_TRANSFORMERS_VERSION[ + model_name], msg.format( + model_name, LOWEST_TRANSFORMERS_VERSION[model_name], + TRANSFORMERS_VERSION) check(type(model).__name__) if use_varlen_attn: From 7575f2c9137cf6ea24f7cb63ae22c1bad38bbe79 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Tue, 9 Jul 2024 08:42:09 +0800 Subject: [PATCH 08/29] [Docs] fix benchmark table (#801) fix benchmark table --- docs/zh_cn/acceleration/benchmark.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh_cn/acceleration/benchmark.rst b/docs/zh_cn/acceleration/benchmark.rst index 6434b6d15..5a1c80804 100644 --- a/docs/zh_cn/acceleration/benchmark.rst +++ b/docs/zh_cn/acceleration/benchmark.rst @@ -79,7 +79,7 @@ - 176.9 - `llama2_7b_full_alpaca_enzh_256k_sp8.py `_ * - Llama2-7B - - 8 + - 32 - 1M - 133.6 - 153.9 From 8adc8d4a5d6f5a33474f85093d8bb43c24c2b32e Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Tue, 9 Jul 2024 08:42:51 +0800 Subject: [PATCH 09/29] [Feature] support output without loss in openai_map_fn (#816) support output without loss in openai_map_fn --- xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py b/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py index 64ed642f6..c1798dc45 100644 --- a/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py +++ b/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py @@ -32,10 +32,13 @@ def openai_map_fn(example): elif msg['role'] == 'user': input += msg['content'] elif msg['role'] == 'assistant': + output_with_loss = msg.get('loss', True) + output_with_loss = output_with_loss.lower() == 'true' conversation.append({ 'system': system, 'input': input, - 'output': msg['content'] + 'output': msg['content'], + 'output_with_loss': output_with_loss }) system = '' input = '' From 48df4c8bd137abb215e09cd4ec03dee564f51d18 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:13:01 +0800 Subject: [PATCH 10/29] [Docs] fix typos in sp docs (#821) fix typo --- docs/zh_cn/acceleration/train_extreme_long_sequence.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh_cn/acceleration/train_extreme_long_sequence.rst b/docs/zh_cn/acceleration/train_extreme_long_sequence.rst index b59a5a83f..65b364ad8 100644 --- a/docs/zh_cn/acceleration/train_extreme_long_sequence.rst +++ b/docs/zh_cn/acceleration/train_extreme_long_sequence.rst @@ -56,7 +56,7 @@ - yi-34B - ZeRO-3 - 16 - - OOM + - 227 为解决长序列训练过程中的显存问题,Megatron-LM 团队和 DeepSpeed 团队分别提出了两种序列并行算法,通过对长序列进行切分的方法来降低单 GPU 上计算的序列长度。XTuner 中的序列并行设计思路参考了 DeepSpeed 的工作 `DeepSpeed Ulysses `_,并加以优化, **以实现一键开启序列并行策略** 。三者的对比如下: From b92481fb40c6df2941bd18dafb512e91514abedc Mon Sep 17 00:00:00 2001 From: Xu Song Date: Thu, 11 Jul 2024 17:21:07 +0800 Subject: [PATCH 11/29] [Feature] Support the DatasetInfoHook of DPO training (#787) * [Feature] Support the DatasetInfoHook of DPO training * fix yapf check --- .../internlm/internlm2_chat_1_8b_dpo_full.py | 4 +-- ...internlm2_chat_1_8b_dpo_full_varlenattn.py | 4 +-- ..._1_8b_dpo_full_varlenattn_jsonl_dataset.py | 4 +-- .../internlm2_chat_7b_dpo_qlora_varlenattn.py | 4 +-- ...llama3_8b_instruct_dpo_qlora_varlenattn.py | 4 +-- xtuner/engine/hooks/dataset_info_hook.py | 29 ++++++++++++------- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full.py b/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full.py index dd3909f72..908683fe6 100644 --- a/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full.py +++ b/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full.py @@ -11,7 +11,7 @@ preference_collate_fn from xtuner.dataset.preference_dataset import (build_preference_dataset, orpo_dpo_mix_40k_map_fn) -from xtuner.engine.hooks import (EvaluateChatHook, +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop from xtuner.model.dpo import DPO @@ -141,7 +141,7 @@ ####################################################################### # Log the dialogue periodically during the training process, optional custom_hooks = [ - # dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict(type=DatasetInfoHook, tokenizer=tokenizer), dict( type=EvaluateChatHook, tokenizer=tokenizer, diff --git a/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn.py b/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn.py index 3e5cdc35a..787ad68bb 100644 --- a/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn.py +++ b/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn.py @@ -11,7 +11,7 @@ preference_collate_fn from xtuner.dataset.preference_dataset import (build_preference_dataset, orpo_dpo_mix_40k_map_fn) -from xtuner.engine.hooks import (EvaluateChatHook, +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop from xtuner.model.dpo import DPO @@ -151,7 +151,7 @@ ####################################################################### # Log the dialogue periodically during the training process, optional custom_hooks = [ - # dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict(type=DatasetInfoHook, tokenizer=tokenizer), dict( type=EvaluateChatHook, tokenizer=tokenizer, diff --git a/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py b/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py index 55bb270a4..ae1a3cdca 100644 --- a/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py +++ b/xtuner/configs/dpo/internlm/internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py @@ -10,7 +10,7 @@ preference_collate_fn from xtuner.dataset.preference_dataset import (build_preference_dataset, load_jsonl_dataset) -from xtuner.engine.hooks import (EvaluateChatHook, +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop from xtuner.model.dpo import DPO @@ -155,7 +155,7 @@ ####################################################################### # Log the dialogue periodically during the training process, optional custom_hooks = [ - # dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict(type=DatasetInfoHook, tokenizer=tokenizer), dict( type=EvaluateChatHook, tokenizer=tokenizer, diff --git a/xtuner/configs/dpo/internlm/internlm2_chat_7b_dpo_qlora_varlenattn.py b/xtuner/configs/dpo/internlm/internlm2_chat_7b_dpo_qlora_varlenattn.py index b051ea2a1..659d029b3 100644 --- a/xtuner/configs/dpo/internlm/internlm2_chat_7b_dpo_qlora_varlenattn.py +++ b/xtuner/configs/dpo/internlm/internlm2_chat_7b_dpo_qlora_varlenattn.py @@ -14,7 +14,7 @@ preference_collate_fn from xtuner.dataset.preference_dataset import (build_preference_dataset, orpo_dpo_mix_40k_map_fn) -from xtuner.engine.hooks import (EvaluateChatHook, +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop from xtuner.model.dpo import DPO @@ -170,7 +170,7 @@ ####################################################################### # Log the dialogue periodically during the training process, optional custom_hooks = [ - # dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict(type=DatasetInfoHook, tokenizer=tokenizer), dict( type=EvaluateChatHook, tokenizer=tokenizer, diff --git a/xtuner/configs/dpo/llama/llama3_8b_instruct_dpo_qlora_varlenattn.py b/xtuner/configs/dpo/llama/llama3_8b_instruct_dpo_qlora_varlenattn.py index 0ca90f51c..e94b88fd0 100644 --- a/xtuner/configs/dpo/llama/llama3_8b_instruct_dpo_qlora_varlenattn.py +++ b/xtuner/configs/dpo/llama/llama3_8b_instruct_dpo_qlora_varlenattn.py @@ -14,7 +14,7 @@ preference_collate_fn from xtuner.dataset.preference_dataset import (build_preference_dataset, orpo_dpo_mix_40k_map_fn) -from xtuner.engine.hooks import (EvaluateChatHook, +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop from xtuner.model.dpo import DPO @@ -170,7 +170,7 @@ ####################################################################### # Log the dialogue periodically during the training process, optional custom_hooks = [ - # dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict(type=DatasetInfoHook, tokenizer=tokenizer), dict( type=EvaluateChatHook, tokenizer=tokenizer, diff --git a/xtuner/engine/hooks/dataset_info_hook.py b/xtuner/engine/hooks/dataset_info_hook.py index d997373ec..84dc9498a 100644 --- a/xtuner/engine/hooks/dataset_info_hook.py +++ b/xtuner/engine/hooks/dataset_info_hook.py @@ -25,19 +25,26 @@ def __init__(self, tokenizer, is_intern_repo_dataset=False): self.is_intern_repo_dataset = is_intern_repo_dataset def log(self, runner, dataset, mode='train'): + + def _log(input_ids, log_prefix=''): + if self.is_intern_repo_dataset: + input_ids = [abs(x) for x in input_ids] + # Try to split list to be compatible with IMAGE token + input_ids = split_list(input_ids, IMAGE_TOKEN_INDEX) + text = log_prefix + for idx, ids in enumerate(input_ids): + text += self.tokenizer.decode(ids) + if idx != len(input_ids) - 1: + text += DEFAULT_IMAGE_TOKEN + runner.logger.info(text) + runner.logger.info(f'Num {mode} samples {len(dataset)}') runner.logger.info(f'{mode} example:') - input_ids = dataset[0]['input_ids'] - if self.is_intern_repo_dataset: - input_ids = [abs(x) for x in input_ids] - # Try to split list to be compatible with IMAGE token - input_ids = split_list(input_ids, IMAGE_TOKEN_INDEX) - text = '' - for idx, ids in enumerate(input_ids): - text += self.tokenizer.decode(ids) - if idx != len(input_ids) - 1: - text += DEFAULT_IMAGE_TOKEN - runner.logger.info(text) + if 'chosen_ids' in dataset[0]: + _log(dataset[0]['chosen_ids'], log_prefix='chosen: ') + _log(dataset[0]['rejected_ids'], log_prefix='rejected: ') + else: + _log(dataset[0]['input_ids']) def before_train(self, runner) -> None: do_train = runner.train_loop is not None From ff226e18f2d686dffe5cde842b78d7f9fe996693 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Fri, 19 Jul 2024 10:03:40 +0800 Subject: [PATCH 12/29] [Enhance]: Fix sequence parallel memory bottleneck in DPO & ORPO (#830) * [WIP]: Fix sequence parallel memory bottleneck in DPO * loss mask before split * refactor orpo --- xtuner/model/dpo.py | 115 ++++++++++++++++++------------------ xtuner/model/orpo.py | 135 ++++++++++++++++++++++--------------------- 2 files changed, 128 insertions(+), 122 deletions(-) diff --git a/xtuner/model/dpo.py b/xtuner/model/dpo.py index b46ea1c50..9a7b97a19 100644 --- a/xtuner/model/dpo.py +++ b/xtuner/model/dpo.py @@ -62,77 +62,66 @@ def _gather_masked_logits(self, logits, labels, mask): def get_logps( self, - all_logits, # bs, seqlen,vocab_size - all_ref_logits, # bs, seqlen,vocab_size - labels, # bs, seqlen + policy_logps, # bs, seqlen,vocab_size + ref_logps, # bs, seqlen,vocab_size + loss_mask, # bs, seqlen ): - labels = labels[:, 1:].clone() - all_logits = all_logits[:, :-1, :] - all_ref_logits = all_ref_logits[:, :-1, :] - - labels[labels == -100] = 0 - loss_mask = labels != 0 - all_logps = self._gather_masked_logits(all_logits, labels, - loss_mask).sum(-1) - all_ref_logps = self._gather_masked_logits(all_ref_logits, labels, - loss_mask).sum(-1) + policy_logps = policy_logps[:, :-1].sum(-1) + ref_logps = ref_logps[:, :-1].sum(-1) + loss_mask = loss_mask[:, :-1] if self.loss_type == 'ipo': # average_log_prob - all_logps = all_logps / loss_mask.sum(-1) - all_ref_logps = all_ref_logps / loss_mask.sum(-1) + policy_logps = policy_logps / loss_mask.sum(-1) + ref_logps = ref_logps / loss_mask.sum(-1) - policy_chosen_logps = all_logps[::2] - policy_rejected_logps = all_logps[1::2] - reference_chosen_logps = all_ref_logps[::2] - reference_rejected_logps = all_ref_logps[1::2] + policy_chosen_logps = policy_logps[::2] + policy_rejected_logps = policy_logps[1::2] + reference_chosen_logps = ref_logps[::2] + reference_rejected_logps = ref_logps[1::2] return (policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps) - def get_var_len_atten_logps(self, all_logits, all_ref_logits, labels, + def get_var_len_atten_logps(self, policy_logps, ref_logps, loss_mask, cu_seqlens, attention_mask): seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() # unpack sequence - unpacked_logits = torch.split(all_logits, seqlens, dim=1) - unpacked_ref_logits = torch.split(all_ref_logits, seqlens, dim=1) - unpacked_labels = torch.split(labels, seqlens, dim=1) + unpacked_policy_logps = torch.split(policy_logps, seqlens, dim=1) + unpacked_ref_logps = torch.split(ref_logps, seqlens, dim=1) + unpacked_loss_mask = torch.split(loss_mask, seqlens, dim=1) if attention_mask is not None: # It indicate that we pad the original sequence, labels, # position_ids and cumulative_len for sequence parallel if the # attention_mask is not None. # We then need to remove the padded segments. assert False in attention_mask - unpacked_logits = unpacked_logits[:-1] - unpacked_ref_logits = unpacked_ref_logits[:-1] - unpacked_labels = unpacked_labels[:-1] - assert len(unpacked_logits) % 2 == 0 + unpacked_policy_logps = unpacked_policy_logps[:-1] + unpacked_ref_logps = unpacked_ref_logps[:-1] + unpacked_loss_mask = unpacked_loss_mask[:-1] + assert len(unpacked_policy_logps) % 2 == 0 - def compute_logps(_logits, _labels): - _labels = _labels[:, 1:].clone() - _logits = _logits[:, :-1, :] - _labels[_labels == -100] = 0 - loss_mask = _labels != 0 - logps = self._gather_masked_logits(_logits, _labels, loss_mask) - logps = logps.sum(-1) + def compute_logps(_logps, _mask): + _logps = _logps[:, :-1].sum(-1) + _mask = _mask[:, :-1] if self.loss_type == 'ipo': - logps /= loss_mask.sum(-1) - return logps + _logps /= _mask.sum(-1) + return _logps (policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps) = [], [], [], [] - for i in range(len(unpacked_logits) // 2): - chosen = unpacked_logits[2 * i] - rejected = unpacked_logits[2 * i + 1] - chosen_ref = unpacked_ref_logits[2 * i] - rejected_ref = unpacked_ref_logits[2 * i + 1] - chosen_label = unpacked_labels[2 * i] - rejected_label = unpacked_labels[2 * i + 1] - policy_chosen_logps.append(compute_logps(chosen, chosen_label)) + for i in range(len(unpacked_policy_logps) // 2): + chosen = unpacked_policy_logps[2 * i] + rejected = unpacked_policy_logps[2 * i + 1] + chosen_ref = unpacked_ref_logps[2 * i] + rejected_ref = unpacked_ref_logps[2 * i + 1] + chosen_mask = unpacked_loss_mask[2 * i] + rejected_mask = unpacked_loss_mask[2 * i + 1] + policy_chosen_logps.append(compute_logps(chosen, chosen_mask)) policy_rejected_logps.append( - compute_logps(rejected, rejected_label)) + compute_logps(rejected, rejected_mask)) reference_chosen_logps.append( - compute_logps(chosen_ref, chosen_label)) + compute_logps(chosen_ref, chosen_mask)) reference_rejected_logps.append( - compute_logps(rejected_ref, rejected_label)) + compute_logps(rejected_ref, rejected_mask)) return (torch.stack(policy_chosen_logps), torch.stack(policy_rejected_logps), @@ -142,7 +131,7 @@ def compute_logps(_logits, _labels): @staticmethod def _split_for_sequence_parallel(data): # attention mask should not be split - ARGS_NEED_TO_SPLIT = ('input_ids', 'position_ids') + ARGS_NEED_TO_SPLIT = ('input_ids', 'position_ids', 'labels') sp_group = get_sequence_parallel_group() for key in ARGS_NEED_TO_SPLIT: val = data.get(key, None) @@ -154,8 +143,14 @@ def _split_for_sequence_parallel(data): def compute_loss(self, data, data_samples=None): # modified from https://github.com/huggingface/trl/blob/main/trl/trainer/dpo_trainer.py # noqa - - labels = data.pop('labels') + # shift labels first and add a dummy label at the end, to support sequence parallel # noqa + data['labels'] = torch.cat( + (data['labels'][:, 1:], torch.zeros_like(data['labels'][:, :1])), + dim=1) + tmp_label = data['labels'].clone() + tmp_label[tmp_label == 0] = -100 + all_loss_mask = data[ + 'labels'] != -100 # loss mask of all tokens in all sp ranks # noqa if get_sequence_parallel_world_size() > 1: data = self._split_for_sequence_parallel(data) @@ -168,14 +163,22 @@ def compute_loss(self, data, data_samples=None): else: all_ref_logits = self.ref_llm(**data).logits + labels = data['labels'] + labels[labels == -100] = 0 + loss_mask = labels != 0 # loss mask in a single sp rank + policy_logps = self._gather_masked_logits(all_logits, labels, + loss_mask) + ref_logps = self._gather_masked_logits(all_ref_logits, labels, + loss_mask) + if get_sequence_parallel_world_size() > 1: - all_logits = gather_forward_split_backward( - all_logits, + policy_logps = gather_forward_split_backward( + policy_logps, dim=1, sp_group=get_sequence_parallel_group(), grad_scale='up') - all_ref_logits = gather_forward_split_backward( - all_ref_logits, + ref_logps = gather_forward_split_backward( + ref_logps, dim=1, sp_group=get_sequence_parallel_group(), grad_scale='up') @@ -184,7 +187,7 @@ def compute_loss(self, data, data_samples=None): (policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps) = self.get_logps( - all_logits, all_ref_logits, labels) + policy_logps, ref_logps, all_loss_mask) else: message_hub = MessageHub.get_instance('varlen_attn_args') rank = dist.get_rank() @@ -192,7 +195,7 @@ def compute_loss(self, data, data_samples=None): (policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps) = self.get_var_len_atten_logps( - all_logits, all_ref_logits, labels, cu_seqlens, + policy_logps, ref_logps, all_loss_mask, cu_seqlens, data['attention_mask']) pi_logratios = policy_chosen_logps - policy_rejected_logps diff --git a/xtuner/model/orpo.py b/xtuner/model/orpo.py index 5fb4b7d27..37264088a 100644 --- a/xtuner/model/orpo.py +++ b/xtuner/model/orpo.py @@ -34,17 +34,12 @@ def _gather_masked_logits(self, logits, labels, mask): def get_logps( self, - all_logits, # bs, seqlen,vocab_size - average_log_prob, # bs, seqlen,vocab_size - labels, # bs, seqlen + all_logps, # bs, seqlen + average_log_prob, + loss_mask, # bs, seqlen ): - labels = labels[:, 1:].clone() - all_logits = all_logits[:, :-1, :] - - labels[labels == -100] = 0 - loss_mask = labels != 0 - all_logps = self._gather_masked_logits(all_logits, labels, - loss_mask).sum(-1) + all_logps = all_logps[:, :-1].sum(-1) + loss_mask = loss_mask[:, :-1] if average_log_prob: # average_log_prob all_logps = all_logps / loss_mask.sum(-1) @@ -53,47 +48,44 @@ def get_logps( rejected_logps = all_logps[1::2] return chosen_logps, rejected_logps - def get_var_len_atten_logps(self, all_logits, average_log_prob, labels, + def get_var_len_atten_logps(self, all_logps, average_log_prob, loss_mask, cu_seqlens, attention_mask): seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() # unpack sequence - unpacked_logits = torch.split(all_logits, seqlens, dim=1) - unpacked_labels = torch.split(labels, seqlens, dim=1) + unpacked_logps = torch.split(all_logps, seqlens, dim=1) + unpacked_loss_mask = torch.split(loss_mask, seqlens, dim=1) if attention_mask is not None: # It indicate that we pad the original sequence, labels, # position_ids and cumulative_len for sequence parallel if the # attention_mask is not None. # We then need to remove the padded segments. assert False in attention_mask - unpacked_logits = unpacked_logits[:-1] - unpacked_labels = unpacked_labels[:-1] - assert len(unpacked_logits) % 2 == 0 - - def compute_logps(_logits, _labels): - _labels = _labels[:, 1:].clone() - _logits = _logits[:, :-1, :] - _labels[_labels == -100] = 0 - loss_mask = _labels != 0 - logps = self._gather_masked_logits(_logits, _labels, loss_mask) - logps = logps.sum(-1) + unpacked_logps = unpacked_logps[:-1] + unpacked_loss_mask = unpacked_loss_mask[:-1] + assert len(unpacked_logps) % 2 == 0 + + def compute_logps(_logps, _mask): + _logps = _logps[:, :-1].sum(-1) + _mask = _mask[:, :-1] if average_log_prob: - logps /= loss_mask.sum(-1) - return logps + _logps /= _mask.sum(-1) + return _logps chosen_logps, rejected_logps = [], [] - for i in range(len(unpacked_logits) // 2): - chosen = unpacked_logits[2 * i] - rejected = unpacked_logits[2 * i + 1] - chosen_label = unpacked_labels[2 * i] - rejected_label = unpacked_labels[2 * i + 1] - chosen_logps.append(compute_logps(chosen, chosen_label)) - rejected_logps.append(compute_logps(rejected, rejected_label)) + for i in range(len(unpacked_logps) // 2): + chosen = unpacked_logps[2 * i] + rejected = unpacked_logps[2 * i + 1] + chosen_mask = unpacked_loss_mask[2 * i] + rejected_mask = unpacked_loss_mask[2 * i + 1] + chosen_logps.append(compute_logps(chosen, chosen_mask)) + rejected_logps.append(compute_logps(rejected, rejected_mask)) return (torch.stack(chosen_logps), torch.stack(rejected_logps)) def cross_entropy_loss(self, logits, labels): logits = logits[..., :-1, :].contiguous() - labels = labels[..., 1:].contiguous() + # labels are already shifted, now we need to remove the last dummy label # noqa + labels = labels[..., :-1].contiguous() # Flatten the tokens loss_fct = nn.CrossEntropyLoss() logits = logits.view(-1, logits.shape[-1]) @@ -126,7 +118,8 @@ def odds_ratio_loss( @staticmethod def _split_for_sequence_parallel(data): # attention mask should not be split - ARGS_NEED_TO_SPLIT = ('input_ids', 'position_ids') + ARGS_NEED_TO_SPLIT = ('input_ids', 'position_ids', 'labels', + 'chosen_rejected_tag') sp_group = get_sequence_parallel_group() for key in ARGS_NEED_TO_SPLIT: val = data.get(key, None) @@ -137,53 +130,63 @@ def _split_for_sequence_parallel(data): return data def compute_loss(self, data, data_samples=None): - labels_ori = data.pop('labels') + # shift labels first and add a dummy label at the end, to support sequence parallel # noqa + data['labels'] = torch.cat( + (data['labels'][:, 1:], torch.zeros_like(data['labels'][:, :1])), + dim=1) + tmp_label = data['labels'].clone() + tmp_label[tmp_label == 0] = -100 + # loss mask of all tokens in all sp ranks + all_loss_mask = data['labels'] != -100 + + if self.use_varlen_attn: + # create a chosen rejected tag for varlen_attn ce loss + message_hub = MessageHub.get_instance('varlen_attn_args') + rank = dist.get_rank() + cu_seqlens = message_hub.get_info(f'cumulative_len_rank_{rank}') + seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() + + chosen_rejected_tag = torch.ones_like(data['labels']) + unpacked_tag = list( + torch.split(chosen_rejected_tag, seqlens, dim=1)) + # import pdb; pdb.set_trace() + for i in range(len(unpacked_tag) // 2): + # import pdb; pdb.set_trace() + unpacked_tag[2 * i + 1] *= 0 + chosen_rejected_tag = torch.cat(unpacked_tag, dim=1) + data['chosen_rejected_tag'] = chosen_rejected_tag if get_sequence_parallel_world_size() > 1: data = self._split_for_sequence_parallel(data) - + chosen_rejected_tag = data.pop('chosen_rejected_tag', None) all_logits = self.llm(**data).logits + + labels = data['labels'].clone() + labels[labels == -100] = 0 + loss_mask = labels != 0 # loss mask in a single sp rank + all_logps = self._gather_masked_logits(all_logits, labels, loss_mask) if get_sequence_parallel_world_size() > 1: - all_logits = gather_forward_split_backward( - all_logits, + all_logps = gather_forward_split_backward( + all_logps, dim=1, sp_group=get_sequence_parallel_group(), grad_scale='up') if not self.use_varlen_attn: chosen_nll_loss = self.cross_entropy_loss(all_logits[::2], - labels_ori.clone()[::2]) + data['labels'][::2]) chosen_logps, rejected_logps = self.get_logps( - all_logits, True, labels_ori) + all_logps, True, all_loss_mask) else: - message_hub = MessageHub.get_instance('varlen_attn_args') - rank = dist.get_rank() - cu_seqlens = message_hub.get_info(f'cumulative_len_rank_{rank}') - seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() - - attention_mask = data['attention_mask'] - if attention_mask is not None: - # It indicate that we pad the original sequence, labels, - # position_ids and cumulative_len for sequence parallel if the - # attention_mask is not None. - # We then need to remove the padded segments. - logits = torch.split(all_logits, seqlens, dim=1)[:-1] - assert len(logits) % 2 == 0 - chosen_logits = logits[::2] - labels = torch.split(labels_ori.clone(), seqlens, dim=1)[:-1] - assert len(labels) % 2 == 0 - chosen_labels = labels[::2] - else: - chosen_logits = torch.split(all_logits, seqlens, dim=1)[::2] - chosen_labels = torch.split( - labels_ori.clone(), seqlens, dim=1)[::2] - - chosen_logits = torch.cat(chosen_logits, dim=1) - chosen_labels = torch.cat(chosen_labels, dim=1) + chosen_idxs = chosen_rejected_tag == 1 + chosen_logits = all_logits[chosen_idxs] + chosen_labels = data['labels'][chosen_idxs] chosen_nll_loss = self.cross_entropy_loss(chosen_logits, chosen_labels) + chosen_logps, rejected_logps = self.get_var_len_atten_logps( - all_logits, True, labels_ori, cu_seqlens, attention_mask) + all_logps, True, all_loss_mask, cu_seqlens, + data['attention_mask']) (losses, chosen_rewards, rejected_rewards, log_odds_ratio, log_odds_chosen) = self.odds_ratio_loss(chosen_logps, rejected_logps) losses = losses.mean() From 381d1c866d106505a164011a421df5a742aa33d7 Mon Sep 17 00:00:00 2001 From: bychen7 <55865490+bychen7@users.noreply.github.com> Date: Fri, 19 Jul 2024 10:08:15 +0800 Subject: [PATCH 13/29] [Fix] Fix typo (#795) Update quick_start.md Fix typo --- docs/zh_cn/reward_model/quick_start.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh_cn/reward_model/quick_start.md b/docs/zh_cn/reward_model/quick_start.md index 3762a4e8c..736624cef 100644 --- a/docs/zh_cn/reward_model/quick_start.md +++ b/docs/zh_cn/reward_model/quick_start.md @@ -76,7 +76,7 @@ XTuner 已经集成好了将模型转换为 HuggingFace 格式的工具,我们 mkdir work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy/iter_15230_hf # 转换格式 -xtuner convert pth_to_hf internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py.py \ +xtuner convert pth_to_hf internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py \ work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230.pth \ work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230_hf ``` From ba7afc79b1f338b8485d6f51763c078f877116da Mon Sep 17 00:00:00 2001 From: Xu Song Date: Fri, 19 Jul 2024 13:32:02 +0800 Subject: [PATCH 14/29] [Fix] fix initialization of ref_llm for full param dpo training with zero-3 (#778) * Fix initialization of ref_llm * Update dpo.py * Update dpo.py * Update dpo.py * Update sft.py * Update dpo.py * Update dpo.py * Update dpo.py --- xtuner/model/dpo.py | 45 ++++++++++++++++++++++++--------------------- xtuner/model/sft.py | 21 +++++++++++++++------ 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/xtuner/model/dpo.py b/xtuner/model/dpo.py index 9a7b97a19..9384ddb34 100644 --- a/xtuner/model/dpo.py +++ b/xtuner/model/dpo.py @@ -16,6 +16,26 @@ from .sft import SupervisedFinetune +def disable_grad(model): + # freeze parameters + parameter_names = [n for n, _ in model.named_parameters()] + for param_name in parameter_names: + param = model.get_parameter(param_name) + param.requires_grad = False + return model.eval() + + +def create_reference_model(model): + if is_deepspeed_zero3_enabled(): + raise ValueError('DeepSpeed ZeRO-3 is enabled and is not compatible ' + 'with `create_reference_model()`. Please instantiate ' + 'your reference model directly with ' + '`AutoCausalLM.from_pretrained()`.') + ref_model = deepcopy(model) + ref_model = disable_grad(ref_model) + return ref_model + + class DPO(SupervisedFinetune): """A general class of DPO and its variants.""" @@ -27,32 +47,15 @@ def __init__(self, label_smoothing=0.0, **kwargs): super().__init__(llm, **kwargs) - self.ref_llm = ref_llm self.loss_type = loss_type self.label_smoothing = label_smoothing self.beta = beta - if not self.use_lora: - self.ref_llm = self.create_reference_model(ref_llm, **kwargs) - - def create_reference_model(self, ref_llm=None, **kwargs): - ref_model = None - if ref_llm is None: - if is_deepspeed_zero3_enabled(): - raise ValueError( - 'DeepSpeed ZeRO-3 is enabled and is not compatible ' - 'with `deepcopy(self.llm)`. Please instantiate ' - 'your reference model by modifying key `model.ref_llm` ' - 'in your config with `AutoCausalLM.from_pretrained()`.') - ref_model = deepcopy(self.llm) + if ref_llm is not None: + ref_llm = self._build_llm_from_cfg(ref_llm, kwargs.get("use_varlen_attn"), kwargs.get("max_position_embeddings")) + self.ref_llm = disable_grad(ref_llm) else: - ref_model = SupervisedFinetune(ref_llm, **kwargs).llm - # freeze parameters - parameter_names = [n for n, _ in ref_model.named_parameters()] - for param_name in parameter_names: - param = ref_model.get_parameter(param_name) - param.requires_grad = False - return ref_model.eval() + self.ref_llm = None if self.use_lora else create_reference_model(self.llm) def _gather_masked_logits(self, logits, labels, mask): logits = torch.gather( diff --git a/xtuner/model/sft.py b/xtuner/model/sft.py index d030c6c20..9c3fa38c9 100644 --- a/xtuner/model/sft.py +++ b/xtuner/model/sft.py @@ -79,10 +79,8 @@ def __init__(self, tokenizer=None, max_position_embeddings=None): super().__init__() - with LoadWoInit(): - if isinstance(llm, dict): - llm = self._dispatch_lm_model_cfg(llm, max_position_embeddings) - self.llm = self._build_from_cfg_or_module(llm) + + self.llm = self._build_llm_from_cfg(llm, use_varlen_attn, max_position_embeddings) if tokenizer is not None: if isinstance(tokenizer, dict): @@ -90,8 +88,6 @@ def __init__(self, smart_tokenizer_and_embedding_resize(tokenizer, self.llm) self.llm.config.use_cache = False - dispatch_modules(self.llm, use_varlen_attn=use_varlen_attn) - if use_activation_checkpointing: # For backward compatibility if hasattr(self.llm, 'enable_input_require_grads'): @@ -119,6 +115,19 @@ def __init__(self, # the sequence. self.use_varlen_attn = use_varlen_attn + + def _build_llm_from_cfg(self, llm_cfg, use_varlen_attn, max_position_embeddings): + # For forward + with LoadWoInit(): + if isinstance(llm_cfg, dict): + llm = self._dispatch_lm_model_cfg(llm_cfg, max_position_embeddings) + llm = self._build_from_cfg_or_module(llm) + + llm.config.use_cache = False + dispatch_modules(llm, use_varlen_attn=use_varlen_attn) + return llm + + def gradient_checkpointing_enable(self): self.activation_checkpointing_enable() From 30133d57fc205488c2a7e1ea110ee46e93cb6801 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:09:17 +0800 Subject: [PATCH 15/29] [Bugs] Fix attn mask (#852) * [WIP]: Fix sequence parallel memory bottleneck in DPO * loss mask before split * refactor orpo * fix attention_mask in preference_collate_fn --------- Co-authored-by: RangiLyu --- xtuner/dataset/collate_fns/preference_collate_fn.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xtuner/dataset/collate_fns/preference_collate_fn.py b/xtuner/dataset/collate_fns/preference_collate_fn.py index 8a6060410..ca21613bb 100644 --- a/xtuner/dataset/collate_fns/preference_collate_fn.py +++ b/xtuner/dataset/collate_fns/preference_collate_fn.py @@ -58,7 +58,7 @@ def preference_collate_fn(instances: Sequence[Dict], labels = torch.stack(labels) if use_varlen_attn: - attention_mask = None + attention_mask = torch.ones_like(input_ids).bool() position_ids = torch.stack(position_ids, dim=0) else: # Some tokenizers have the same eos token and pad token, so input_ids @@ -74,8 +74,10 @@ def preference_collate_fn(instances: Sequence[Dict], input_ids = pad_for_sequence_parallel(input_ids, pad_index) labels = pad_for_sequence_parallel(labels, IGNORE_INDEX) position_ids = pad_for_sequence_parallel(position_ids, 0) - if attention_mask is not None: - attention_mask = pad_for_sequence_parallel(attention_mask, 0) + # We use attention_mask to distinguish `input_ids` from + # (sequence parallel) pad tokens in `get_var_len_atten_logps` method of + # class `DPO` and `ORPO` + attention_mask = pad_for_sequence_parallel(attention_mask, 0) if use_varlen_attn: (cumulative_len, attention_mask ) = pad_cumulative_len_for_sequence_parallel(cumulative_len) From d58c1dd48fd5405fcfee0a409a854e215ec5f022 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:09:46 +0800 Subject: [PATCH 16/29] fix lint (#854) --- xtuner/model/dpo.py | 7 +++++-- xtuner/model/sft.py | 11 ++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/xtuner/model/dpo.py b/xtuner/model/dpo.py index 9384ddb34..faaa43402 100644 --- a/xtuner/model/dpo.py +++ b/xtuner/model/dpo.py @@ -52,10 +52,13 @@ def __init__(self, self.beta = beta if ref_llm is not None: - ref_llm = self._build_llm_from_cfg(ref_llm, kwargs.get("use_varlen_attn"), kwargs.get("max_position_embeddings")) + ref_llm = self.build_llm_from_cfg( + ref_llm, kwargs.get('use_varlen_attn', False), + kwargs.get('max_position_embeddings', None)) self.ref_llm = disable_grad(ref_llm) else: - self.ref_llm = None if self.use_lora else create_reference_model(self.llm) + self.ref_llm = None if self.use_lora else create_reference_model( + self.llm) def _gather_masked_logits(self, logits, labels, mask): logits = torch.gather( diff --git a/xtuner/model/sft.py b/xtuner/model/sft.py index 9c3fa38c9..522950489 100644 --- a/xtuner/model/sft.py +++ b/xtuner/model/sft.py @@ -80,7 +80,8 @@ def __init__(self, max_position_embeddings=None): super().__init__() - self.llm = self._build_llm_from_cfg(llm, use_varlen_attn, max_position_embeddings) + self.llm = self.build_llm_from_cfg(llm, use_varlen_attn, + max_position_embeddings) if tokenizer is not None: if isinstance(tokenizer, dict): @@ -115,19 +116,19 @@ def __init__(self, # the sequence. self.use_varlen_attn = use_varlen_attn - - def _build_llm_from_cfg(self, llm_cfg, use_varlen_attn, max_position_embeddings): + def build_llm_from_cfg(self, llm_cfg, use_varlen_attn, + max_position_embeddings): # For forward with LoadWoInit(): if isinstance(llm_cfg, dict): - llm = self._dispatch_lm_model_cfg(llm_cfg, max_position_embeddings) + llm = self._dispatch_lm_model_cfg(llm_cfg, + max_position_embeddings) llm = self._build_from_cfg_or_module(llm) llm.config.use_cache = False dispatch_modules(llm, use_varlen_attn=use_varlen_attn) return llm - def gradient_checkpointing_enable(self): self.activation_checkpointing_enable() From 16e2f8f1abc272f2b31ec900f6c94731e20a49d6 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:10:37 +0800 Subject: [PATCH 17/29] [Bugs] Fix dispatch attn bug (#829) * fix collate bug * fix dispatch attn bugs * fix rotary_seq_len bug --- xtuner/dataset/collate_fns/default_collate_fn.py | 4 ++-- xtuner/model/modules/dispatch/cohere.py | 7 ++++++- xtuner/model/modules/dispatch/deepseek_v2.py | 5 +++++ xtuner/model/modules/dispatch/internlm2.py | 5 +++++ xtuner/model/modules/dispatch/mistral.py | 8 +++++++- xtuner/model/modules/dispatch/phi3.py | 8 +++++++- xtuner/model/modules/dispatch/qwen2.py | 8 +++++++- 7 files changed, 39 insertions(+), 6 deletions(-) diff --git a/xtuner/dataset/collate_fns/default_collate_fn.py b/xtuner/dataset/collate_fns/default_collate_fn.py index f4d5f9197..0ca9264f0 100644 --- a/xtuner/dataset/collate_fns/default_collate_fn.py +++ b/xtuner/dataset/collate_fns/default_collate_fn.py @@ -56,8 +56,8 @@ def default_collate_fn(instances: Sequence[Dict], # Some tokenizers have the same eos token and pad token, so input_ids # cannot be masked directly based on the pad token id. attention_mask = torch.zeros_like(input_ids).bool() - for i in ori_length: - attention_mask[:i] = True + for i, length in enumerate(ori_length): + attention_mask[i, :length] = True bs, seq_len = input_ids.shape position_ids = torch.arange(seq_len).unsqueeze(0).long().repeat(bs, 1) diff --git a/xtuner/model/modules/dispatch/cohere.py b/xtuner/model/modules/dispatch/cohere.py index edeb771e3..d3529f570 100644 --- a/xtuner/model/modules/dispatch/cohere.py +++ b/xtuner/model/modules/dispatch/cohere.py @@ -105,17 +105,22 @@ def cohere_attn_forward( query_states, key_states, value_states = \ pre_process_for_sequence_parallel_attn( query_states, key_states, value_states) + # self.num_heads is used in self._upad_input method + # num_heads has been changed because of sequence parallel + ori_num_head = self.num_heads + self.num_heads = query_states.shape[-2] attn_output = self._flash_attention_forward( query_states, key_states, value_states, attention_mask, - q_len, + query_states.shape[1], dropout=dropout_rate) if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) + self.num_heads = ori_num_head attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) attn_output = self.o_proj(attn_output) diff --git a/xtuner/model/modules/dispatch/deepseek_v2.py b/xtuner/model/modules/dispatch/deepseek_v2.py index dcdb677a3..667d2227c 100644 --- a/xtuner/model/modules/dispatch/deepseek_v2.py +++ b/xtuner/model/modules/dispatch/deepseek_v2.py @@ -128,6 +128,10 @@ def deepseek_attn_forward( query_states, key_states, value_states = \ pre_process_for_sequence_parallel_attn( query_states, key_states, value_states) + # self.num_heads is used in self._upad_input method + # num_heads has been changed because of sequence parallel + ori_num_head = self.num_heads + self.num_heads = query_states.shape[-2] attn_output = self._flash_attention_forward( query_states, @@ -141,6 +145,7 @@ def deepseek_attn_forward( if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) + self.num_heads = ori_num_head if self.q_head_dim != self.v_head_dim: attn_output = attn_output[:, :, :, :self.v_head_dim] diff --git a/xtuner/model/modules/dispatch/internlm2.py b/xtuner/model/modules/dispatch/internlm2.py index 5b855d4ab..7c601f0dc 100644 --- a/xtuner/model/modules/dispatch/internlm2.py +++ b/xtuner/model/modules/dispatch/internlm2.py @@ -149,6 +149,10 @@ def internlm2_attn_forward( query_states, key_states, value_states = \ pre_process_for_sequence_parallel_attn( query_states, key_states, value_states) + # self.num_heads is used in self._upad_input method + # num_heads has been changed because of sequence parallel + ori_num_head = self.num_heads + self.num_heads = query_states.shape[-2] dropout_rate = 0.0 attn_output = self._flash_attention_forward( @@ -161,6 +165,7 @@ def internlm2_attn_forward( if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) + self.num_heads = ori_num_head attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) attn_output = self.wo(attn_output) diff --git a/xtuner/model/modules/dispatch/mistral.py b/xtuner/model/modules/dispatch/mistral.py index 49dfdc108..d08b0f00e 100644 --- a/xtuner/model/modules/dispatch/mistral.py +++ b/xtuner/model/modules/dispatch/mistral.py @@ -214,6 +214,11 @@ def mistral_attn_forward( query_states, key_states, value_states = \ pre_process_for_sequence_parallel_attn( query_states, key_states, value_states) + # num_heads has been changed because of sequence parallel + # `self.num_heads`` is not used in self._flash_attention_forward + # in mistral/mixtral, we are doing this to avoid some unnecessary risk + ori_num_head = self.num_heads + self.num_heads = query_states.shape[-2] attn_output = self._flash_attention_forward( query_states, @@ -227,6 +232,7 @@ def mistral_attn_forward( if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) + self.num_heads = ori_num_head attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous() @@ -311,7 +317,7 @@ def mistral_varlen_attn_forward( value_states = value_states.transpose(1, 2) # Because the input can be padded, the absolute sequence length # depends on the max position id. - rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item() + 1) + rotary_seq_len = max(kv_seq_len, position_ids.max().item() + 1) cos, sin = self.rotary_emb(value_states, seq_len=rotary_seq_len) query_states, key_states = apply_rotary_pos_emb( query_states, key_states, cos, sin, position_ids) diff --git a/xtuner/model/modules/dispatch/phi3.py b/xtuner/model/modules/dispatch/phi3.py index 4003c9d62..97ebc8d33 100644 --- a/xtuner/model/modules/dispatch/phi3.py +++ b/xtuner/model/modules/dispatch/phi3.py @@ -233,6 +233,11 @@ def phi3_attn_forward( pre_process_for_sequence_parallel_attn( query_states, key_states, value_states, scatter_dim=2, gather_dim=1) + # num_heads has been changed because of sequence parallel + # `self.num_heads`` is not used in self._flash_attention_forward + # in mistral/mixtral, we are doing this to avoid some unnecessary risk + ori_num_head = self.num_heads + self.num_heads = query_states.shape[-2] attn_output = self._flash_attention_forward( query_states, @@ -248,6 +253,7 @@ def phi3_attn_forward( # (b, s, nd // sp_world_size, dim) -> (b, s // sp_world_size, nd, dim) attn_output = post_process_for_sequence_parallel_attn( attn_output, scatter_dim=1, gather_dim=2) + self.num_heads = ori_num_head attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) attn_output = self.o_proj(attn_output) @@ -333,7 +339,7 @@ def phi3_varlen_attn_forward( self.layer_idx) assert position_ids is not None - rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item()) + 1 + rotary_seq_len = max(kv_seq_len, position_ids.max().item() + 1) cos, sin = self.rotary_emb( value_states, position_ids, seq_len=rotary_seq_len) diff --git a/xtuner/model/modules/dispatch/qwen2.py b/xtuner/model/modules/dispatch/qwen2.py index d89dbf947..1c8c5a8d0 100644 --- a/xtuner/model/modules/dispatch/qwen2.py +++ b/xtuner/model/modules/dispatch/qwen2.py @@ -151,6 +151,11 @@ def qwen2_attn_forward( query_states, key_states, value_states = \ pre_process_for_sequence_parallel_attn( query_states, key_states, value_states) + # num_heads has been changed because of sequence parallel + # `self.num_heads`` is not used in self._flash_attention_forward + # in mistral/mixtral, we are doing this to avoid some unnecessary risk + ori_num_head = self.num_heads + self.num_heads = query_states.shape[-2] attn_output = self._flash_attention_forward( query_states, @@ -164,6 +169,7 @@ def qwen2_attn_forward( if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) + self.num_heads = ori_num_head attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) attn_output = self.o_proj(attn_output) @@ -227,7 +233,7 @@ def qwen2_varlen_attn_forward( self.layer_idx) assert position_ids is not None - rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item() + 1) + rotary_seq_len = max(kv_seq_len, position_ids.max().item() + 1) cos, sin = self.rotary_emb(value_states, seq_len=rotary_seq_len) query_states, key_states = apply_rotary_pos_emb(query_states, key_states, From 3617c9868fb3a853c06488ef99029ed3243fdd24 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Fri, 19 Jul 2024 17:14:35 +0800 Subject: [PATCH 18/29] [Docs]: update readme and DPO en docs (#853) * [Docs]: update readme and DPO en docs * update link --- README.md | 4 + README_zh-CN.md | 4 + docs/en/dpo/modify_settings.md | 83 +++++++++++++ docs/en/dpo/overview.md | 27 +++++ docs/en/dpo/quick_start.md | 71 +++++++++++ docs/en/index.rst | 17 +++ docs/en/reward_model/modify_settings.md | 100 ++++++++++++++++ docs/en/reward_model/overview.md | 43 +++++++ docs/en/reward_model/preference_data.md | 110 ++++++++++++++++++ docs/en/reward_model/quick_start.md | 85 ++++++++++++++ docs/zh_cn/dpo/modify_settings.md | 2 +- docs/zh_cn/dpo/overview.md | 2 + .../reward_model/images/sequence_parallel.png | Bin 0 -> 38382 bytes docs/zh_cn/reward_model/overview.md | 14 +++ 14 files changed, 561 insertions(+), 1 deletion(-) create mode 100644 docs/en/dpo/modify_settings.md create mode 100644 docs/en/dpo/overview.md create mode 100644 docs/en/dpo/quick_start.md create mode 100644 docs/en/reward_model/modify_settings.md create mode 100644 docs/en/reward_model/overview.md create mode 100644 docs/en/reward_model/preference_data.md create mode 100644 docs/en/reward_model/quick_start.md create mode 100644 docs/zh_cn/reward_model/images/sequence_parallel.png diff --git a/README.md b/README.md index 4e729226c..2a8eb2879 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ English | [简体中文](README_zh-CN.md) ## 🎉 News +- **\[2024/07\]** Support [DPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo), [ORPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo) and [Reward Model](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/reward_model) training with packed data and sequence parallel! See [documents](https://xtuner.readthedocs.io/en/latest/dpo/overview.html) for more details. - **\[2024/07\]** Support [InternLM 2.5](xtuner/configs/internlm/internlm2_5_chat_7b/) models! - **\[2024/06\]** Support [DeepSeek V2](xtuner/configs/deepseek/deepseek_v2_chat/) models! **2x faster!** - **\[2024/04\]** [LLaVA-Phi-3-mini](https://huggingface.co/xtuner/llava-phi-3-mini-hf) is released! Click [here](xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336) for details! @@ -144,6 +145,9 @@ XTuner is an efficient, flexible and full-featured toolkit for fine-tuning large
  • QLoRA
  • LoRA
  • Full parameter fine-tune
  • +
  • DPO
  • +
  • ORPO
  • +
  • Reward Model
  • diff --git a/README_zh-CN.md b/README_zh-CN.md index 16c1a2af2..58076210f 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -39,6 +39,7 @@ ## 🎉 更新 +- **\[2024/07\]** 支持训练 [DPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo), [ORPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo) 还有 [Reward Model](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/reward_model) ! 并且能够支持打包数据以及序列并行功能! 请参考 [文档](https://xtuner.readthedocs.io/zh-cn/latest/dpo/overview.html) 了解更多信息。 - **\[2024/07\]** 支持 [InternLM 2.5](xtuner/configs/internlm/internlm2_5_chat_7b/) 模型! - **\[2024/06\]** 支持 [DeepSeek V2](xtuner/configs/deepseek/deepseek_v2_chat/) models! **训练速度提升一倍!** - **\[2024/04\]** 多模态大模型 [LLaVA-Phi-3-mini](https://huggingface.co/xtuner/llava-phi-3-mini-hf) 发布!快速开始请查阅此[文档](xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336)! @@ -144,6 +145,9 @@ XTuner 是一个高效、灵活、全能的轻量化大模型微调工具库。
  • QLoRA
  • LoRA
  • 全量参数微调
  • +
  • DPO
  • +
  • ORPO
  • +
  • Reward Model
  • diff --git a/docs/en/dpo/modify_settings.md b/docs/en/dpo/modify_settings.md new file mode 100644 index 000000000..d78cc40e6 --- /dev/null +++ b/docs/en/dpo/modify_settings.md @@ -0,0 +1,83 @@ +## Modify DPO Training Configuration + +This section introduces config parameters related to DPO (Direct Preference Optimization) training. For more details on XTuner config files, please refer to [Modifying Training Configuration](https://xtuner.readthedocs.io/zh-cn/latest/training/modify_settings.html). + +### Loss Function + +In DPO training, you can choose different types of loss functions according to your needs. XTuner provides various loss function options, such as `sigmoid`, `hinge`, `ipo`, etc. You can select the desired loss function type by setting the `dpo_loss_type` parameter. + +Additionally, you can control the temperature coefficient in the loss function by adjusting the `loss_beta` parameter. The `label_smoothing` parameter can be used for smoothing labels. + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +dpo_loss_type = 'sigmoid' # One of ['sigmoid', 'hinge', 'ipo', 'kto_pair', 'sppo_hard', 'nca_pair', 'robust'] +loss_beta = 0.1 +label_smoothing = 0.0 +``` + +### Modifying the Model + +Users can modify `pretrained_model_name_or_path` to change the pretrained model. + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft' +``` + +### Training Data + +In DPO training, you can specify the maximum number of tokens for a single sample sequence using the `max_length` parameter. XTuner will automatically truncate or pad the data. + +```python +# Data +max_length = 2048 +``` + +In the configuration file, we use the `train_dataset` field to specify the training dataset. You can specify the dataset loading method using the `dataset` field and the dataset mapping function using the `dataset_map_fn` field. + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataset = dict( + type=build_preference_dataset, + dataset=dict(type=load_dataset, path='mlabonne/orpo-dpo-mix-40k'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=True, + is_reward=False, + reward_token_id=-1, + num_proc=32, + use_varlen_attn=use_varlen_attn, + max_packed_length=max_packed_length, + shuffle_before_pack=True, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict( + type=preference_collate_fn, use_varlen_attn=use_varlen_attn)) +``` + +In the above configuration, we use `load_dataset` to load the `mlabonne/orpo-dpo-mix-40k` dataset from Hugging Face and use `orpo_dpo_mix_40k_map_fn` as the dataset mapping function. + +For more information on handling datasets and writing dataset mapping functions, please refer to the [Preference Dataset Section](../reward_model/preference_data.md). + +### Accelerating Training + +When training with preference data, we recommend enabling the [Variable-Length Attention Mechanism](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html) to avoid memory waste caused by length differences between chosen and rejected samples within a single preference. You can enable the variable-length attention mechanism by setting `use_varlen_attn=True`. + +XTuner also supports many training acceleration methods. For details on how to use them, please refer to the [Acceleration Strategies Section](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/hyper_parameters.html). diff --git a/docs/en/dpo/overview.md b/docs/en/dpo/overview.md new file mode 100644 index 000000000..0c20946e3 --- /dev/null +++ b/docs/en/dpo/overview.md @@ -0,0 +1,27 @@ +## Introduction to DPO + +### Overview + +DPO (Direct Preference Optimization) is a method used in large language model training for directly optimizing human preferences. Unlike traditional reinforcement learning methods, DPO directly uses human preference data to optimize the model, thereby improving the quality of generated content to better align with human preferences. DPO also eliminates the need to train a Reward Model and a Critic Model, avoiding the complexity of reinforcement learning algorithms, reducing training overhead, and enhancing training efficiency. + +Many algorithms have made certain improvements to DPO's loss function. In XTuner, besides DPO, we have also implemented loss functions from papers such as [Identity Preference Optimization (IPO)](https://huggingface.co/papers/2310.12036). To use these algorithms, please refer to the [Modify DPO Settings](./modify_settings.md) section. We also provide some [example configurations](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo) for reference. + +In addition to DPO, there are alignment algorithms like [ORPO](https://arxiv.org/abs/2403.07691) that do not require a reference model. ORPO uses the concept of odds ratio to optimize the model by penalizing rejected samples during the training process, thereby adapting more effectively to the chosen samples. ORPO eliminates the dependence on a reference model, making the training process more simplified and efficient. The training method for ORPO in XTuner is very similar to DPO, and we provide some [example configurations](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo). Users can refer to the DPO tutorial to modify the configuration. + +### Features of DPO Training in XTuner + +DPO training in XTuner offers the following significant advantages: + +1. **Latest Algorithms**: In addition to supporting standard DPO, XTuner also supports improved DPO algorithms or memory efficient algorithms like ORPO that do not rely on reference models. + +2. **Reducing Memory Waste**: Due to the length differences in chosen and rejected data in preference datasets, padding tokens during data concatenation can cause memory waste. In XTuner, by utilizing the variable-length attention feature from Flash Attention2, preference pairs are packed into the same sequence during training, significantly reducing memory waste caused by padding tokens. This not only improves memory efficiency but also allows for training larger models or handling more data under the same hardware conditions. + + ![img](../../zh_cn/reward_model/images/var_len_atten.png) + +3. **Efficient Training**: Leveraging XTuner's QLoRA training capabilities, the reference model can be converted into a policy model with the LoRA adapter removed, eliminating the memory overhead of the reference model weights and significantly reducing DPO training costs. + +4. **Long Text Training**: With XTuner's sequence parallel functionality, long text data can be trained efficiently. + +### Getting Started + +Refer to the [Quick Start Guide](./quick_start.md) to understand the basic concepts. For more information on configuring training parameters, please see the [Modify DPO Settings](./modify_settings.md) section. diff --git a/docs/en/dpo/quick_start.md b/docs/en/dpo/quick_start.md new file mode 100644 index 000000000..19fffbf8b --- /dev/null +++ b/docs/en/dpo/quick_start.md @@ -0,0 +1,71 @@ +## Quick Start with DPO + +In this section, we will introduce how to use XTuner to train a 1.8B DPO (Direct Preference Optimization) model to help you get started quickly. + +### Preparing Pretrained Model Weights + +We use the model [InternLM2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft), as the initial model for DPO training to align human preferences. + +Set `pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft'` in the training configuration file, and the model files will be automatically downloaded when training starts. If you need to download the model weights manually, please refer to the section [Preparing Pretrained Model Weights](https://xtuner.readthedocs.io/zh-cn/latest/preparation/pretrained_model.html), which provides detailed instructions on how to download model weights from Huggingface or Modelscope. Here are the links to the models on HuggingFace and ModelScope: + +- HuggingFace link: https://huggingface.co/internlm/internlm2-chat-1_8b-sft +- ModelScope link: https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary + +### Preparing Training Data + +In this tutorial, we use the [mlabonne/orpo-dpo-mix-40k](https://huggingface.co/datasets/mlabonne/orpo-dpo-mix-40k) dataset from Huggingface as an example. + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='mlabonne/orpo-dpo-mix-40k'), + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=True, + is_reward=False, +) +``` + +Using the above configuration in the configuration file will automatically download and process this dataset. If you want to use other open-source datasets from Huggingface or custom datasets, please refer to the [Preference Dataset](../reward_model/preference_data.md) section. + +### Preparing Configuration File + +XTuner provides several ready-to-use configuration files, which can be viewed using `xtuner list-cfg`. Execute the following command to copy a configuration file to the current directory. + +```bash +xtuner copy-cfg internlm2_chat_1_8b_dpo_full . +``` + +Open the copied configuration file. If you choose to download the model and dataset automatically, no modifications are needed. If you want to specify paths to your pre-downloaded model and dataset, modify the `pretrained_model_name_or_path` and the `path` parameter in `dataset` under `train_dataset`. + +For more training parameter configurations, please refer to the section [Modifying DPO Training Configuration](./modify_settings.md) section. + +### Starting the Training + +After completing the above steps, you can start the training task using the following commands. + +```bash +# Single machine, single GPU +xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py +# Single machine, multiple GPUs +NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py +# Slurm cluster +srun ${SRUN_ARGS} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py --launcher slurm +``` + +### Model Conversion + +XTuner provides integrated tools to convert models to HuggingFace format. Simply execute the following commands: + +```bash +# Create a directory for HuggingFace format parameters +mkdir work_dirs/internlm2_chat_1_8b_dpo_full_copy/iter_15230_hf + +# Convert format +xtuner convert pth_to_hf internlm2_chat_1_8b_dpo_full_copy.py \ + work_dirs/internlm2_chat_1_8b_dpo_full_copy/iter_15230.pth \ + work_dirs/internlm2_chat_1_8b_dpo_full_copy/iter_15230_hf +``` + +This will convert the XTuner's ckpt to the HuggingFace format. diff --git a/docs/en/index.rst b/docs/en/index.rst index c702e0a04..c4c18d31a 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -56,6 +56,23 @@ Documentation training/open_source_dataset.rst training/visualization.rst +.. toctree:: + :maxdepth: 2 + :caption: DPO + + dpo/overview.md + dpo/quick_start.md + dpo/modify_settings.md + +.. toctree:: + :maxdepth: 2 + :caption: Reward Model + + reward_model/overview.md + reward_model/quick_start.md + reward_model/modify_settings.md + reward_model/preference_data.md + .. toctree:: :maxdepth: 2 :caption: Acceleration diff --git a/docs/en/reward_model/modify_settings.md b/docs/en/reward_model/modify_settings.md new file mode 100644 index 000000000..4f41ca300 --- /dev/null +++ b/docs/en/reward_model/modify_settings.md @@ -0,0 +1,100 @@ +## Modify Reward Model Training Configuration + +This section introduces the config related to Reward Model training. For more details on XTuner config files, please refer to [Modify Settings](https://xtuner.readthedocs.io/zh-cn/latest/training/modify_settings.html). + +### Loss Function + +XTuner uses the [Bradley–Terry Model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model) for preference modeling in the Reward Model. You can specify `loss_type="ranking"` to use ranking loss. XTuner also implements the focal loss function proposed in InternLM2, which adjusts the weights of difficult and easy samples to avoid overfitting. You can set `loss_type="focal"` to use this loss function. For a detailed explanation of this loss function, please refer to the [InternLM2 Technical Report](https://arxiv.org/abs/2403.17297). + +Additionally, to maintain stable reward model output scores, we have added a constraint term in the loss. You can specify `penalty_type='log_barrier'` or `penalty_type='L2'` to enable log barrier or L2 constraints, respectively. + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +loss_type = 'focal' # 'ranking' or 'focal' +penalty_type = 'log_barrier' # 'log_barrier' or 'L2' +``` + +### Modifying the Model + +Users can modify `pretrained_model_name_or_path` to change the pretrained model. + +Note that XTuner calculates reward scores by appending a special token at the end of the data. Therefore, when switching models with different vocabularies, the ID of this special token also needs to be modified accordingly. We usually use an unused token at the end of the vocabulary as the reward token. + +For example, in InternLM2, we use `[UNUSED_TOKEN_130]` as the reward token: + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft' +reward_token_id = 92527 # use [UNUSED_TOKEN_130] as reward token +``` + +If the user switches to the llama3 model, we can use `<|reserved_special_token_0|>` as the reward token: + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' +reward_token_id = 128002 # use <|reserved_special_token_0|> as reward token +``` + +### Training Data + +In Reward Model training, you can specify the maximum number of tokens for a single sample sequence using `max_length`. XTuner will automatically truncate or pad the data. + +```python +# Data +max_length = 2048 +``` + +In the configuration file, we use the `train_dataset` field to specify the training dataset. You can specify the dataset loading method using the `dataset` field and the dataset mapping function using the `dataset_map_fn` field. + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='argilla/ultrafeedback-binarized-preferences-cleaned'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=False, + is_reward=True, + reward_token_id=reward_token_id, + num_proc=32, + use_varlen_attn=use_varlen_attn, + max_packed_length=max_packed_length, + shuffle_before_pack=True, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict( + type=preference_collate_fn, use_varlen_attn=use_varlen_attn)) +``` + +In the above configuration, we use `load_dataset` to load the `argilla/ultrafeedback-binarized-preferences-cleaned` dataset from Hugging Face, using `orpo_dpo_mix_40k_map_fn` as the dataset mapping function (this is because `orpo_dpo_mix_40k` and `ultrafeedback-binarized-preferences-cleaned` have the same format, so the same mapping function is used). + +For more information on handling datasets and writing dataset mapping functions, please refer to the [Preference Data Section](./preference_data.md). + +### Accelerating Training + +When training with preference data, we recommend enabling the [Variable-Length Attention Mechanism](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html) to avoid memory waste caused by length differences between chosen and rejected samples within a single preference. You can enable the variable-length attention mechanism by setting `use_varlen_attn=True`. + +XTuner also supports many training acceleration methods. For details on how to use them, please refer to the [Acceleration Strategies Section](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/hyper_parameters.html). diff --git a/docs/en/reward_model/overview.md b/docs/en/reward_model/overview.md new file mode 100644 index 000000000..eb210140c --- /dev/null +++ b/docs/en/reward_model/overview.md @@ -0,0 +1,43 @@ +## Introduction to Reward Model + +### Overview + +The Reward Model is a crucial component in the reinforcement learning process. Its primary task is to predict reward values based on given inputs, guiding the direction of the learning algorithm. In RLHF (Reinforcement Learning from Human Feedback), the Reward Model acts as a proxy for human preferences, helping the reinforcement learning algorithm optimize strategies more effectively. + +In large language model training, the Reward Model typically refers to the Preference Model. By providing good and bad (chosen & rejected) responses to the same prompts during training, it fits human preferences and predicts a reward value during inference to guide the optimization of the Actor model in the RLHF process. + +Applications of the Reward Model include but are not limited to: + +- **RLHF Training**: During RLHF training such as the Proximal Policy Optimization (PPO) algorithm, the Reward Model provides reward signals, improve the quality of generated content, and align it more closely with human preferences. +- **BoN Sampling**: In the Best-of-N (BoN) sampling process, users can use the Reward Model to score multiple responses to the same prompt and select the highest-scoring generated result, thereby enhancing the model's output. +- **Data Construction**: The Reward Model can be used to evaluate and filter training data or replace manual annotation to construct DPO training data. + +### Features of Reward Model Training in XTuner + +The Reward Model training in XTuner offers the following significant advantages: + +1. **Latest Training Techniques**: XTuner integrates the Reward Model training loss function from InternLM2, which stabilizes the numerical range of reward scores and reduces overfitting on simple samples (see [InternLM2 Technical Report](https://arxiv.org/abs/2403.17297) for details). + +2. **Reducing Memory Waste**: Due to the length differences in chosen and rejected data in preference datasets, padding tokens during data concatenation can cause memory waste. In XTuner, by utilizing the variable-length attention feature from Flash Attention2, preference pairs are packed into the same sequence during training, significantly reducing memory waste caused by padding tokens. This not only improves memory efficiency but also allows for training larger models or handling more data under the same hardware conditions. + +![img](../../zh_cn/reward_model/images/var_len_atten.png) + +3. **Efficient Training**: Leveraging XTuner's QLoRA training capabilities, we can perform full parameter training only on the Reward Model's Value Head, while using QLoRA fine-tuning on the language model itself, substantially reducing the memory overhead of model training. + +4. **Long Text Training**: With XTuner's sequence parallel functionality, long text data can be trained efficiently. + +![img](../../zh_cn/reward_model/images/sequence_parallel.png) + +### Getting Started + +Refer to the [Quick Start Guide](./quick_start.md) to understand the basic concepts. For more information on configuring training parameters, please see the [Modifying Reward Model Settings](./modify_settings.md) section. + +### Open-source Models + +We use XTuner to train the InternLM2 Reward Models from the InternLM2 Technical Report, welcome to download and use: + +| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | RewardBench Score | +| ------------------------- | -------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | +| **InternLM2-1.8B-Reward** | [🤗internlm2-1_8b-reward](https://huggingface.co/internlm/internlm2-1_8b-reward) | [internlm2-1_8b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b-reward/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-1_8b-reward) | 80.6 | +| **InternLM2-7B-Reward** | [🤗internlm2-7b-reward](https://huggingface.co/internlm/internlm2-7b-reward) | [internlm2-7b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b-reward/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-reward) | 86.6 | +| **InternLM2-20B-Reward** | [🤗internlm2-20b-reward](https://huggingface.co/internlm/internlm2-20b-reward) | [internlm2-20b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b-reward/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-reward) | 89.5 | diff --git a/docs/en/reward_model/preference_data.md b/docs/en/reward_model/preference_data.md new file mode 100644 index 000000000..2f304e627 --- /dev/null +++ b/docs/en/reward_model/preference_data.md @@ -0,0 +1,110 @@ +## Preference Dataset + +### Overview + +XTuner's Reward Model, along with DPO, ORPO, and other algorithms that training on preference data, adopts the same data format. Each training sample in the preference dataset needs to contain the following three fields: `prompt`, `chosen`, and `rejected`. The values for each field follow the [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) format. A specific example is as follows: + +```json +{ + "prompt": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Who won the world series in 2020?" + }, + { + "role": "assistant", + "content": "The Los Angeles Dodgers won the World Series in 2020." + }, + { + "role": "user", + "content": "Where was it played?" + } + ], + "chosen": [ + { + "role": "assistant", + "content": "The 2020 World Series was played at Globe Life Field in Arlington, Texas." + } + ], + "rejected": [ + { + "role": "assistant", + "content": "I don't know." + } + ] +} +``` + +When conducting Reward Model training or DPO training, XTuner processes the preference dataset into different training labels based on the type of training task. + +![img](../../zh_cn/reward_model/images/preference_data.png) + +As shown in the above image, for Reward Model training, we follow the ChatGPT training method by adding a special `<|reward|>` token at the end of the conversation data and calculating the loss only on the logits output by this token. For DPO series algorithm training, we mask the tokens in the prompt part and calculate the loss only on the chosen and rejected responses. In the configuration file, we control the dataset type through the `is_reward` and `is_dpo` fields in the dataset. + +### Training with Custom Data + +After understanding the data format of a single sample, you only need to save the converted data in [JSON Lines](https://jsonlines.org/) format. Each line in the file is a sample. + +```json +{"prompt": [{"content": "How can I learn to better regulate my emotions as a grown adult?", "role": "user"}], "chosen": [{"content": "You could do emotional regulation exercises with me, and try to notice and practice ways that you could better control your emotions in real life.", "role": "assistant"}], "rejected": [{"content": "Do you mean how can you learn to be more emotionally mature?", "role": "assistant"}]} +{"prompt": [{"content": "What are some interesting things I can add to a grilled cheese sandwich?", "role": "user"}, {"content": "A tomato, a slice of pickle, some chopped pickles, some chopped green onions. Also, some mayonnaise if you like.", "role": "assistant"}, {"content": "Sounds great. Anything else?", "role": "user"}], "chosen": [{"content": "If you’d like something a little different, I recommend grating some horseradish on top of the sandwich.", "role": "assistant"}], "rejected": [{"content": "Maybe some nice fresh basil?", "role": "assistant"}]} +{"prompt": [{"content": "How do I bob for apples?", "role": "user"}], "chosen": [{"content": "Ah! Do you mean the game of “applesauce,” or do you mean the idea of “to bob for apples”?", "role": "assistant"}], "rejected": [{"content": "Sorry, I don’t know that term.", "role": "assistant"}]} +...... +``` + +After preparing the custom dataset, you need to fill in the path to your saved data in the `data_files` field in the configuration file. You can load multiple JSONL files simultaneously for training. + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_jsonl_dataset, + data_files=[ + '/your/jsonl/path/here.jsonl', + '/your/another/jsonl/path/here.jsonl' + ]), +) +``` + +### Training with Open Source Datasets + +Similar to configuring SFT data in XTuner, when using open-source datasets from Hugging Face, you only need to define a mapping function `map_fn` to process the dataset format into XTuner's data format. + +Taking `Intel/orca_dpo_pairs` as an example, this dataset has `system`, `question`, `chosen`, and `rejected` fields, with each field's value in text format instead of the [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) format. Therefore, we need to define a mapping function for this dataset: + +```python +def intel_orca_dpo_map_fn(example): + prompt = [{ + 'role': 'system', + 'content': example['system'] + }, { + 'role': 'user', + 'content': example['question'] + }] + chosen = [{'role': 'assistant', 'content': example['chosen']}] + rejected = [{'role': 'assistant', 'content': example['rejected']}] + return {'prompt': prompt, 'chosen': chosen, 'rejected': rejected} +``` + +As shown in the code, `intel_orca_dpo_map_fn` processes the four fields in the original data, converting them into `prompt`, `chosen`, and `rejected` fields, and ensures each field follows the [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) format, maintaining uniformity in subsequent data processing flows. + +After defining the mapping function, you need to import it in the configuration file and configure it in the `dataset_map_fn` field. + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='Intel/orca_dpo_pairs'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=intel_orca_dpo_map_fn, +) +``` diff --git a/docs/en/reward_model/quick_start.md b/docs/en/reward_model/quick_start.md new file mode 100644 index 000000000..5c802be2f --- /dev/null +++ b/docs/en/reward_model/quick_start.md @@ -0,0 +1,85 @@ +## Quick Start Guide for Reward Model + +In this section, we will introduce how to use XTuner to train a 1.8B Reward Model, helping you get started quickly. + +### Preparing Pretrained Model Weights + +According to the paper [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155), we use a language model fine-tuned with SFT as the initialization model for the Reward Model. Here, we use [InternLM2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft) as the initialization model. + +Set `pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft'` in the training configuration file, and the model files will be automatically downloaded when training starts. If you need to download the model weights manually, please refer to the section [Preparing Pretrained Model Weights](https://xtuner.readthedocs.io/zh-cn/latest/preparation/pretrained_model.html), which provides detailed instructions on how to download model weights from Huggingface or Modelscope. Here are the links to the models on HuggingFace and ModelScope: + +- HuggingFace link: https://huggingface.co/internlm/internlm2-chat-1_8b-sft +- ModelScope link: https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary + +### Preparing Training Data + +In this tutorial, we use the [UltraFeedback](https://arxiv.org/abs/2310.01377) dataset as an example. For convenience, we use the preprocessed [argilla/ultrafeedback-binarized-preferences-cleaned](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned) dataset from Huggingface. + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='argilla/ultrafeedback-binarized-preferences-cleaned'), + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=False, + is_reward=True, +) +``` + +Using the above configuration in the configuration file will automatically download and process this dataset. If you want to use other open-source datasets from Huggingface or custom datasets, please refer to the [Preference Dataset](./preference_data.md) section. + +### Preparing Configuration Files + +XTuner provides several ready-to-use configuration files, which can be viewed using `xtuner list-cfg`. Execute the following command to copy a configuration file to the current directory. + +```bash +xtuner copy-cfg internlm2_chat_1_8b_reward_full_ultrafeedback . +``` + +Open the copied configuration file. If you choose to download the model and dataset automatically, no modifications are needed. If you want to specify paths to your pre-downloaded model and dataset, modify the `pretrained_model_name_or_path` and the `path` parameter in `dataset` under `train_dataset`. + +For more training parameter configurations, please refer to the section [Modifying Reward Training Configuration](./modify_settings.md). + +### Starting the Training + +After completing the above steps, you can start the training task using the following commands. + +```bash +# Single node single GPU +xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py +# Single node multiple GPUs +NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py +# Slurm cluster +srun ${SRUN_ARGS} xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py --launcher slurm +``` + +The correct training log should look like the following (running on a single A800 GPU): + +``` +06/06 16:12:11 - mmengine - INFO - Iter(train) [ 10/15230] lr: 3.9580e-07 eta: 2:59:41 time: 0.7084 data_time: 0.0044 memory: 18021 loss: 0.6270 acc: 0.0000 chosen_score_mean: 0.0000 rejected_score_mean: 0.0000 num_samples: 4.0000 num_tokens: 969.0000 +06/06 16:12:17 - mmengine - INFO - Iter(train) [ 20/15230] lr: 8.3536e-07 eta: 2:45:25 time: 0.5968 data_time: 0.0034 memory: 42180 loss: 0.6270 acc: 0.5000 chosen_score_mean: 0.0013 rejected_score_mean: 0.0010 num_samples: 4.0000 num_tokens: 1405.0000 +06/06 16:12:22 - mmengine - INFO - Iter(train) [ 30/15230] lr: 1.2749e-06 eta: 2:37:18 time: 0.5578 data_time: 0.0024 memory: 32121 loss: 0.6270 acc: 0.7500 chosen_score_mean: 0.0016 rejected_score_mean: 0.0011 num_samples: 4.0000 num_tokens: 932.0000 +06/06 16:12:28 - mmengine - INFO - Iter(train) [ 40/15230] lr: 1.7145e-06 eta: 2:36:05 time: 0.6033 data_time: 0.0025 memory: 42186 loss: 0.6270 acc: 0.7500 chosen_score_mean: 0.0027 rejected_score_mean: 0.0016 num_samples: 4.0000 num_tokens: 994.0000 +06/06 16:12:35 - mmengine - INFO - Iter(train) [ 50/15230] lr: 2.1540e-06 eta: 2:41:03 time: 0.7166 data_time: 0.0027 memory: 42186 loss: 0.6278 acc: 0.5000 chosen_score_mean: 0.0031 rejected_score_mean: 0.0032 num_samples: 4.0000 num_tokens: 2049.0000 +06/06 16:12:40 - mmengine - INFO - Iter(train) [ 60/15230] lr: 2.5936e-06 eta: 2:33:37 time: 0.4627 data_time: 0.0023 memory: 30238 loss: 0.6262 acc: 1.0000 chosen_score_mean: 0.0057 rejected_score_mean: 0.0030 num_samples: 4.0000 num_tokens: 992.0000 +06/06 16:12:46 - mmengine - INFO - Iter(train) [ 70/15230] lr: 3.0331e-06 eta: 2:33:18 time: 0.6018 data_time: 0.0025 memory: 42186 loss: 0.6247 acc: 0.7500 chosen_score_mean: 0.0117 rejected_score_mean: 0.0055 num_samples: 4.0000 num_tokens: 815.0000 +``` + +### Model Conversion + +XTuner provides integrated tools to convert models to HuggingFace format. Simply execute the following commands: + +```bash +# Create a directory to store HF format parameters +mkdir work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy/iter_15230_hf + +# Convert the format +xtuner convert pth_to_hf internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py \ + work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230.pth \ + work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230_hf +``` + +This will convert the XTuner's ckpt to the HuggingFace format. + +Note: Since the Reward Model type is not integrated into the official transformers library, only the Reward Models trained with InternLM2 will be converted to the `InternLM2ForRewardModel` type. Other models will default to the `SequenceClassification` type (for example, LLaMa3 will be converted to the `LlamaForSequenceClassification` type). diff --git a/docs/zh_cn/dpo/modify_settings.md b/docs/zh_cn/dpo/modify_settings.md index 7b4672792..2365be25c 100644 --- a/docs/zh_cn/dpo/modify_settings.md +++ b/docs/zh_cn/dpo/modify_settings.md @@ -32,7 +32,7 @@ pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft' ### 训练数据 -在 Reward Model 训练中,你可以通过 `max_length` 来指定单个样本序列的最大 token 数,XTuner 会自动对数据进行截断或是填充。 +在 DPO 训练中,你可以通过 `max_length` 来指定单个样本序列的最大 token 数,XTuner 会自动对数据进行截断或是填充。 ```python # Data diff --git a/docs/zh_cn/dpo/overview.md b/docs/zh_cn/dpo/overview.md index d1bfc4379..d3c3a7aad 100644 --- a/docs/zh_cn/dpo/overview.md +++ b/docs/zh_cn/dpo/overview.md @@ -20,6 +20,8 @@ XTuner 中的 DPO 训练具备以下显著优势: 3. **高效训练**:借助 XTuner 的 QLoRA 训练功能,参考模型能够被转化为移除LoRA适配器的语言模型,从而省去了参考模型权重的显存占用,大幅降低了 DPO 的训练开销。 +4. **长文本训练**: 借助 XTuner 的序列并行功能,能够对长文本数据进行训练。 + ### 开始训练 请参阅[快速上手](./quick_start.md)来了解最基本的概念,若希望了解更多训练参数配置相关的内容,请参考[修改DPO配置](./modify_settings.md)章节。 diff --git a/docs/zh_cn/reward_model/images/sequence_parallel.png b/docs/zh_cn/reward_model/images/sequence_parallel.png new file mode 100644 index 0000000000000000000000000000000000000000..53f86c81aa66d94d5cb523f6fa20e2c27e2f5bc1 GIT binary patch literal 38382 zcmcG0byQqSx-SF^9>E$9?vh4=hu|LE-5Phd0Kwe}1b27$U;)xdaEIU$2p;@xPIBg+ zxo76yd2g-v2a841Yxl0&Rp0ls>X5gxV(2LNC@?TE=n~?>3NSE_p2EPu7C(LjyuuCg zwT6Kqg^>^zRC0ygPt&!1KD~Hx@d?*sHLUGNug=d;JZq?j;mo6Ga$#4ELhAjjmT#Vy zWF2OCplx9*gIq4@m$_d)tha4MQyG>s^w>UuGf};27JS_Yi;E+V z&QvOwm|F$QG#XAB{uu5+DtR~5sjQ2Aw~yeFs1*Xjw51TV#&}G`t0@?ZU77<94X2by3BGC=I7_!rSzJM;O-T2X#k!`v z!?o3iom%;HWh8-5u<{71_9>WUrTu+Tx!N?omO+oU%oF4IvVre=C}N_q4p4=G=s2oi zb5q+_I9&UgLp1I5v=Vmr_^~vGgh&D68O!QI&*04jl37N>)k-25a`vHb_U&hCQYg%M z<>h0k{CFjvV29^$5rTg36h{->D@(*a>x#gV@8gbo9HT=b_gpxFnoKev&68RLrm>R6 z!dBm_nj!+-*4KnVUl`9;_mh%psRa?!QOe5l@H|98J>R#qwx!u1n<6igN8C>@)sQCV zLt#M*DZ!%w3;SEW2xcZzfw7R{=b|YwA@1gnvlm`Wzsgm@l}j8u==x|fVM5TA-Gb+r zspS=$`?KPN+UR~@$Zr%kq#*RwrEDmSt-k)$>ZdD9QV#{1VOE`o{MTlSH{{A|HPb7`HoMBiAwf*MRZ<958$N`rFQy?hS%3&XnS@Sc}kS z@H?+)>CVvR55x@^Vs2X!pH*DS3-sW3D2xP`?141Jv%}A{;+xPs@I*9T%G;m|zKvXg z3=gQ0aV3Ncz(~k%G#$ZJu_hDHQt+iW^9EQ9G!pf8K7N0WIF$ZIJWMlM&8Vw z={SU+TI5N+O4Ds~@W62^Z&VB3;Su+A6`M#bg@p3_Pa@y_%na!-nk5=KA9A;qTen;Z zeW8z;Qb_5v-gqY$8Au;@A9-cyh~#+^;<54JKO1I1p%jyA`6%3%vQOZc z7`||8v0V2jr(-0|xmKkJ#!|@d42B?Hl?9c5{yw~Ggcd0!3hsfFq>(+Y{(7*mapL5v zO`ztev4*1%oBR1SXm9_=HRjcA8B^0`y4_ixEjwJ@P|net?YeCCZc-B*PpQd_s?6__ z>@2Rub~Q@&je6%JZ(bNY(GHtL%O4(&uC;i5XY4U)Lrv>JB(=#V)QW%3HoKl-f~C22 zifJr1AC}`Fv;$XGobet(tVJ^?-Hx#{;kj@gIa@k26YJFX6BpE@R+GYFj3QzrktY+* zuySDWR4;Uq(a30NcUkbur!LrnN*>vRH;8Pf;^p(k2209?m|Qd8>s1HGpJMcXYa=cX zC8G@#h)zw3?%2rVWqGW)U?g=Q^#nJ1s(sxS^yLn8meqx&`3dEPJ`cX$_qOoio|w!Q z-@GA-YQNQbBm>Dj&NwXYkIrmELAk12tzLs8?5y4k-P2zSxv~N;PECBe^8#46_)cpzl<%e=iwoDNOPStxMCVXBkmKi=yR@U3Dt8J7M74Ctc}AuoHZUY7nQVC zelCo##YG%)iZB{|Y2QL3Qxra&g_g%4K3LC3bL!3&N;+sZPo?Whx+1Lj z{MCKkA=6v*PgoaJl7AgMZ9eF^zA7+<$L;OGwxeC~PfrtV51x`+@%Y7bMHfVp4O+E7 zl^}E^_ea0`EY4ONK{@yxB3Z=C^igdC1HIos2-Xx`gcIR;Z2Z3HTI(g;n^SwXg)d{+ z93yYb?bJ1;mp49ibvLR(GhH5k!aziK#(I9|MA5{Gw^KN>UI*U*nys7-Z2)8u;=<@bJzM9_Mfn2$abF^!4h;g6RPm7psvamicQ}4CptJJ8sJwG4GejH1oE8KrjHZwc8TQk89Y_9%%9{hPcoL~I>O+pnC;kN zr{0odkD+bMGw{%hOZFlm=(D6Nq{KVac=lMxXUPTYNFmRU)~w;j+x#dVksVyD3N;!1 zc&#Vi_)&bLnRVsyCeD#j(LhBZvs3>r0=LX z_>y3-*3`|?g8yLhMU7EEqJFcHS-D>SJJgaNp{rk^cViZP^_KHW=kxov^$E18wZ{n} zEo5A<(G^1-ZJYUbnG!|oCa3T70zD%qn{Ikuj%i77u@%OWBaqQw*#U~_$M`J7+n zPMU)$P%4(Z{FY59b`(C~ zWqcp@<>9bIh-3v80@I z9sThsoVz1$O%Sp&T(%QJ&*2{}#^fF+D5n#2y`qATg6}4m)FbGQbhOUn=siN zu5B6F$4?ffe>`aSZ$yuOS8p|XxH)~<~!3}ny!821=53(5k z4xxkj$KetR-ZX8z6WV<7?LQzc7LY)SK8#E|MR>-jRP(b&j@b|Pj~HVdd1EY62N5#r z8V$B$}RyazU?6vvucv5Vie?=Va%=f*wdsL~*5udv?{BTJF$5JgtR=H}%_4>g>L}nwKJAil%NHcIhmP z11&x{FS~)Y{87jE_9<4j8~xKG=y|$Z7(|X_Um?f}5_GTT7RgdxOuv!a+%%0>bc|T6 z>c+*7AxI9DZu*>{LN~Rss+t2r7Ut?LwR3H*$>Fd^-1dnu5mzXlmPmRbqp)d8x>>Tm zgNmMR+YrX2n9QsS9W5ux8_rkn58>|+;29J_?_#)+IuJWi+M<#&*5s^tr3ll9CT!rcv1D1%6I6Dttry@Bt3rX zHkv|l3&Zp}A~t4fUlz|(Y*bV1SvBgxq8Y*Lhn6LQppP@ z)@pyjAqOHc$7gLiNLOxQ^#rS_8_TUtZd|_p7=f`C*+mdNf}nad!RI^LzC-r?s;p2N zCHO(oB;ytxf5aqKbyw9`v_GKnyFZa4dU3HH&WTleAoiU#6VL zR?B3X*w+|ONpx9`g+Hew8(p%nIyAOoQd`?{LXo2SZc{u4YkuY2B`8Ni-Ea_cr$@oD zQkV3+hmK_KSyiP87Sp5A@C`!7#*6$Artd+5;96d3KJbdQIX8}7ewVT|IR4Cd8eyF^ z%D@#>KX##VjU9i%ota8bF#}wobi`V($8B7A z8KdTX!lS-8qmGEo{bWdK^(bs><2>-m6~lnqLm!l^w(xH{uy`pJ>i8=Jtiu=2QA2x5ZyrA`JQzqI?Cql1IBP&?90;VO4MfDLw5zirX$b4TB9* z?6=_@Uk{5<)C$)%W_ZYq(yiF+Wx!FH5v# zdrGnZ_)CR5iR=QiG4vg+=PV2^L{#4>UyFUkI@BkOOZ}PFG2)6+8mA{K&d&Ny^?vg$ z&g%w>MuyHjaU2-crz`=?5A2&J{7ZZZP;8EDSgED_>9KjfeKV)24ktBBTn2^H`sb+d zx8!xz9XDYOz^>3e%Ku>urvtN*m~B}XvRC>leoYaimm5n)0gJD}H?{#IrP zgmEE=I1;>ua0jlDlHK^W`U&6_4Ycwa5~qydOZSRa*Il^#kmQOj8!Lo6riQko_QN(g z*<~M=u!KZ}2Nej=Scay5YbTpA7tRwm8I z>;o;H<41{P5D*DmEX|}^c}2BxuYYL2Mpx0mVR;x}I3+?%XYxc z3>Sa?iiGIq$M$@V&kVIuYJraH)<9PWiHuwU)j{}$&kxC#(Z&K7Mo3`!4?(cw>o2^M z0Yx9m%eH}Mn^26eYD7(b*iJ2h+g8s#9C==sK#@xc(bcr;ger0SO-Fm!O^hg4rt7<{ zy(v9aX{^FeqLBFhX0098+x==->B{-Xry54nMrZxoqNn%M%M=A)8-mJ-eOA9yYYf|F zf=5mQRyD>n5CXe*g(gX7etq>ss=F@L-MKXXeVc3=qv;aBL!6N5T-x^S=4l}KPFCu|v zL-k>CtkprEpm$#hzv6hh)F-oxavBWWxX{t`AvBoNCaTTdyFY09o|`=Xxo)^4xS{k0Awfr16>tS0fhmCl z{XmJ39e8#Cuq4)9TS{)OdG5$rtk?!=%pWDM%2P0@Vx;WePG{(!==4brqV#cVg_`X9 zc&wQ|Cs|7U(8NhDD=TxVed*Ow$HM^|lH{VzGodIJ>h+1nsz>SRuH`mx^j!&7rQLZD(bQ2_4sQmBi-Xfs zBS(gtPdswP2ykpUdNKYcSUm>ABHGC*YZJ{nzzxqnD(_uUV9Pm^zFp1A@bNtPnkTpH zzEi45jh6{RK>q!H*W#*Iz{rriA}7&A8!`~)-9ESp`s%Vkh2h|sCXjBEIv89f z8(DNB>X2e!Whr8-&NIHu@A3{cEDaEx|5BWtV71_wQYeD$r|jM`!-vE%X|j{xiM+|K zl|f#_aIT|%DUV-C5dsMHYLbt7i77_I4UD84bylQC;VTlIx$ZT56}bq-u%EhYKOfLajn}I zn{4bUc~j|PeL1=QG_q9Dgn*)($I?>q^`fCKTh1J1c>m2FMAJcOInY>M&s+-!n*CuG zumsp1^iaK+9x%sBg{WX-8dFV~Z)Zms$HU^Mu8 zDq0xCbwOv!*S%|}s8BtfpnEYtH&feKnx^{PzM$LbJm=s!tL|X}uAJA3z|)hno3gUb z<&ApIrHi(Uft_ni6PZST?2R`;QeKCdAai^w5&~OJE=s9JI4!l_$Bj7tC5G>OxQ=V4 z0~dpq7Mp!mSQpWLc){J!Z+Y86AbTeTQ*GR*7RloUqUe7tO;+AXwb!t}8gOY?xpP(( z&o0;L!IT%=tjyJM^%xZ!aTq@*xA#F?73cgFvA@@qptutRjN(h{$r?) zm~s)M&*ttX?iMb}1&y~nHp2a0mqYjJlu=$xDY8+>h$r>7HkNXlDBhxWGU&YI{$oT$e%wFV&+2IRD~&(D;S zLQQJV1h{{$jx~tgSIT|nkZb!f9Ov&NkdiJdvj950UrBJzdV0Q-O2YKJ?w%;6ZcbK5 z0(EB0+!nog2j4|l$5^U$-^CiE=OyG7NWHHaX+5TYAi5^j4n5eQBCI@<=8Oc2RKB6W zLFRuRa*et57QQqf9TgRNp9#Vz+)Rh0 zn>%Q0ju8N;M;X2RX+_zm&4bXMI0rEPMp0xs#Mz(~Z%()hdf9X;5=#eM{y*qe6W~oG z6iA;ZRlfEZ?_|DPUFky;spZxh$a-fA?*EF_Hn*BEpFgV9KiTd&TvQp6?ILV%Dssn6 z359TVqxb4|aFQ>z@}2Kq5|QZK?}aDJda{0HzmaYaf@3rE&U|O`#nfO9dr{FUMK_Ea z*j)&k@JY$rPV+SUub3KSl(+UkMQ@)E)c0jtN&1%VkGrhjR0y;3k>?9EKTUf&4m7^o z9R;bO5ALJkWMcs~oqHi%;HAkL$`0>LfC4s*Cp z3tP6NXBTw6y!3OnC+Nb&V4l@%r*Bol2S@XET`_w|RyQWzjkR&G`}n*DFm zNYomx4>_h*Vkz>{f!&fGK{B+^#>2-4B0}u|R*`uZ43tI<0`#1!WCwZN{LNQ&<+#99 z*Q0)mO|lmDQ0PLfD}0i2$Snfa7?~g;zPjw^WY)eqElV2acJhNwk!P`IBSh6GWZrEo zS@v7Igol~-q(W4MNrL@&X~P(7VcoQRNy5}d{~tD_qeuVLkbp6Z*5FEM{eYqkIE*l3?L1}w(Em78CH^8x?51!HO=| zMVOF~W`;oB!DN6g0AS}?@9CK)$yw`i zeZhrTCWw0&_#rh(-~BWpJ*{#InB9_27^+meWyT9a#|6E?EwOsewAV)=pNL(AjQ-35 zIE(6h<^6*mf&e&X&gG(NnPtURX>DYTH?kUg*q{xu-1Bk#(Vb%xb*G;ZZI$l6nnsOu z@6E=SmeL4by6>X@{)T*ioU_AG5WERy{T>Pcp%V*vF<{JklZ|9NAkC82#a@w$vdEf> zm|;F>eKhXV}3oxQ$JYb6!|A%T)0m9 z=8=kf?yy>F#wC4}=Uh91uN7jyr8$>__qDr``Y6r5*i<-+3DZ``Wi=pkC5Q)b#H#yt zN!kPul-SXxR5f(ydwwgX$!GSoH?Xgwv-4>9 zrL=TQHmc(IjmE@NU=gXImn(gi-$5&g?TX+y6k)B7b4r({Y;iS_Qj34^)MtsHJh=7p zUu5%+=zO48NcdV!URQzNDwb29+AI-!oZUoXR@^&@N>jJv?cjS;EqBAFunN3<{kQq6 zYmD_p4{zaIFSSF?rbu_J=IPF249`Kb+A1zFh38bjnQu7LZ#1(rzHsKVzf;XlPJcR1 zvKTxZyp!6{f9F_~w;|03bl{{Epb!6Bu>XdRm3T$fPK%vHnk?yCX1PH3W?OhdJa)KK zV|<(TMMv9FO$g>8^vgo(Q|bb$tMZ$6ZM6(swCxcZwI6@4dG1=|I+~yXB3V-E56L#z zg5@Ve)Z*;xc8Uy|L)9g&a&o-W4ZaC0gB(+<0Cbmu@Dd0lHhH+XyT9bd@dj4bKM z{QP$s@vwhrQ^wr$?|zJq2(-KHd@ZnPbDS!Ajt12D_rKo%t=glwBA|`H$`XBOs`4D# z9=u?yFRKFmk=@UZjcUE6mXZw#oPRoT%OQLI$}IK>E$6+I!0zUP+NhSKe!!)jf6oB*%=6x^On5ggnvL=F&_k-QknhWp{zYo=NCSct1{MRGf(O)noJ1AbT)R&MXa)~bSI@&m8 zfY{2sC&VU9F?zd_@R*}2vb?e&F->(uT*P_)1EP;*ac4h+vxNVUft#k^J`58>;a~B; zQ6zh@W+l&iBhlC%VF=jJB9f`3{VjU=3rxkeW0&SeWi&;rHJ1SS03mP6W>;qinj&d? zl?EB(9yRLz3AdbLln}1<#5`Q|a$~okq1}7{BiZ5o1r^7rPf2vuq>z~f7Lo37+ZcWD zYB0Aw+!QxTI6_+F)n!Pr8D3>>=vMiKb5QKa3p z(<>&u1KMA4wZAbdp|ib~!>`Fjq9xlg$lh3f$z^_U9xL|BT$_}2w`QD85&GE&hy*B6 z4v~b1@nKZ|Vj9~UQ!D3IlDn(?IbCPAB(%o#K=Z%)r}+*&i(e_uCDNAcfHA)+FmD?( zo=0M$^d~TzkF+Htmv~-iHMmq%hGn(~im#5{d!QXq6=7-L`tUS{ifxU)%y>-z`->O> zX`zSI(jPh;m3l$o?J##@k;$xipp`9>n7xbu)unDO0bjXMPs4zy>f6GXRhl@MPLB}S zg<@md%5R#N%^miL_UzP|gYq}UgZB2t7O4%|m*|<(uR49mHuo+yHGMmVtZZ5jJr3i> zFG#+mxHHpte!xh&iWCG-uu1cCtXSJiCfXta%GJ{c<*Fa%-hSP*0=)&Zt;OlS)36z| za*g;?b@=w%=;+jzT$zW{_B2a@Tm=5F4FfRocaNAl14Lkk)Nr-GzcF-~aM)ffyGE=| zJ>4*Vs0BD)){Zv|9)>pcXMBhrExWJ$O}5`9yA`7=J6T1-PtOQf>Ird;D za{6wypb~A!e)EqnOLu{-N;F7iw^6Of<$Jpk7s9MW9sCPp%K(9(*ZV~vfOe5@L4O5c z)jac;<aHm_N~vxZLx0>3HhQ@I034!d24&f!Rg(5Pn<6Bzs!3 z|E!m4sX^`lL1=~LLKt8?dvl>nz-}%ww~cVM!sc*MrAYrsAiyZ+bh@6u5}YN2HBYEX z8lvzu9FQy5o~w$mdacGi_qZlO*Ks}XD~l?)XTR*)ib)#gFJn^^Q=qDw8+H8eILYOq4Uwm>Fu2dLeq-i+Hk@s5Ve23@X_D9i}?KeLFZ6sasuL5%o zzH0XTfE8wzlM00d{)xl!6nUXlsL`UA{m4ZfsT}yAt0}SMlUafqpW*#Hfk5idA4^}p zw1MpG&9e^&59b5U+`8V~h5HFU`h`>`F{R^OiR`$2`so=IOmX6 ztyAr&GocganG@j>uHMQ6()#c^N)&>vyJ)Y&6IrFRz8I_B89^YlSXbl3y!{rmo@ z|3e#sSWNyLzJ|WI!~c&g)D-0<7kMQ7B07V%Y%G@H{5tk=4zdGh*2_#Q0^Y2W!nyJV zb8qtJ)gMi!xxD-X*XA5Bw(cuX$1f3WMPy)Xi_92YTS=MJ!JAi|xQ-AjIHK~z@x6=F z^2PC%`b(wT)z8d4%WfK6%J&Hk{0b|%cy8LQIEur8 z6doi+@eWX}ACh+m2RNUC(kwx~9R0&)h;3a2`xuBV{=OzinzB#g&F4c}gMwa-2J8rV zX6MQPOZkX77?n*3su;9)nn2!lGD2`q#CLyFwL0aXy8N-N4_O9vuqqO+FjSsM9JS=nf6W}B~kCxU`AW&*~Y>`u8M@TPMBDWwk4>` z@;O_JuBk!$B!wW4pQ>Wf5P$i{%=J)l6|nPTiOYmWJX<9oZzy%W8Wdmy+>v&C>;qA< z`iJ>4{c;RV;-h4qLbg^@@U@Dm&zUK^A@)~939OIU@bar@a)58PsV(wkBXTsJBBW?S zSTcU^NlY4=d|zXbz3uuSykN9+oItTW+N>ViN@1*6$P;+=J(K|=;^sTFi7I)0mmzSj zy)5e;=St*io{{v)qPQ&#{JYMMwAWS4=^NRgkT_y*Y#G{|+ren!_q~>U{+5q3MGK|j zo#c(SXee)-aEqFGCA^5tV*7wN2dChm@o=L|Uy4D?^cPA|!TXum&?jO-?J5-l z^Q$i|B%(Q%3sIyp!cqtZDFZo&(r|FME=(UB=-n5~Pi}Aw3(y)%tPwD3;DLoo1M{mj z&X6{@^FTAFnr@M9FFQsz;^Rpyc%C}O0|_~>D;EyecCB(Wyx9c?rAU)NcV+qu)mMXR z7XxagBYTBekNws$`(jmZ3Kieiw`kn9FCNbnNG6e1Tzx=rQRXOg{^>y;6UU!U6^tUr z@}b0Zy-x-q>gU29#E*1l(+w_wBpV*BQJ|+lYWTtZ|#u9f|I0i6x=ZL@qIbWL*eSFdrRKR)mm=#4*2oWU8iJ+_Ro3;0{W(jP4AS9GVj{ygV000=% zi{Lhwb2sXdkL?0^cZ?q#+N~rOjph95`z_U|o0YyhuM`)l zwI0&uhqx**{y*ZX5AGxPgHqsvNu2C$?$unhIuwE4pew-Pu_Ze-zrTMwA`pj=V_kiB zt$TMGqG@xgM`?uq+a)C9_-`&D-_LKGUk=Y@?xz*UWzcnhoauHveN&zFSac=t)SUuP zm))uno7esD{QD(xNRIv?POT<#ilmnDn#)h}HM$b%m-aKz3X{>-qxLCf4(?I?)e&oh zXy52ugvS@s9+wKi366DUY#hD(WiOKbv%UCi+P|W`_4?SR;C`>Dh?=1N=TT0BU&SKP zWkw4UCTZ77`^AQ0)biC~&WHGR609;wms!(_%%)9S=_PDdb{u5O8hK?5xQU>*-vYc*M~hj zU^E@nzFjwIcg}unUoS4DHX%6|$^KN7~GvX+M3=VilS44&Qe%&ouoXgJ}}v%3!4yV~@ zh|kXw$+2Ifn&W)(SvGJf(EHI)g%X6kNtoct_JOT>TCts(wWYH&$9cm}exR8;xP7zz zex&SP^JYtcontr70=8U>M^MeJVMIc?@jY6Ky0jH?iNFhzelH7g4^y%dP?F*&XGADx zG)2|)faPPVas&Wc3DUQrigeeWzU7sUgq`kwJ2(%q&3h9mY5FPw&AQf&IW#7%^u&m6 zwim29Y5-k@Qq*`@D->5o7&y~KsJfX>35>1`nqGAIU$13^d1ojLh)?_2RO`!pJ^JZ) z7TzTC-M;)SUk>8;c47D+QGxi$noGpuCH`5(I zf2+!V;d=$9X<|hQLE38z$B{TLGg;(=ZP(Dre#9mOSE?k35@=TRN$u70I}3eLI#i-^ zVQN#&e%91CkXOU5?{s_ZCTH6|jp8Qe3&1hPzcptjVv>j@_B`A8;@h^Rc;lVO+uruO0X&I4uD_lz1``%7xt_2UhKK&3*>Gy1=nOm zLJ4=Jc`}k6*}RX&sjY91T)T#C?by~AEltg085B27;mI!L-xgBRfw;CeRkP~Cs!zbO zgC1SR1V!uah|iKpbz{bDM#B?G45`*%;_c%uzF>;?%`<=>m zRL!zyj;lxX0PoM63~-R{NAIhUfr2UOd)0CHFL2X-fm~h>5vDH1hyVv-Cf233s&R^W z!qgLTs!uJhiTO0422-zYy9aw{jPHDpc~Trml6=O2Fu2CxEa!!M7!kDFxa#J^MCv#7z>iwMhz=!Mp>boNO%YMvK^q9;49@Y6tiBXok5GFJF2$u> z<`A3Pjy<^TNaraVS>!(ID^+2sJ^+81?Geat(QP>+SYB^(SmuCY-*6EzImy2ME2S>z zrKDoY*?r?0rUCBh(WBvw)pXSVVpH&Q<$XNwC;35>NJ$Sn^o@P&F~m+cC>|@DqgnM`G(h6{^jFCNuy_&d%-DtC$n27H_HG~=6NSqWpQ zyM%5~ag=h6DYu*X{oxV`n-6JNJ#}9W>_CecK=(s?$OHXXQfJSvG1N@-Tkenw6iGB; z7HV+AG6DoW*<})IhOKkS)uG#wuG5u~grfPu1=E`YPVlsmtSX9Ns{oaUbIwR}L$P>eM?zwu+rPDq-7WT%{ z*rL3lAdKnRsAjny&o)&Z-ZS5O;V;uC4fT(FlIE9OmoJv63l0U6wVMev)6)UE!ioIL z=7DvIQ>=c^18lMIhb_j9_%n%Iz{6_B>p@yUBDUb_F1kNQ|55Srx;HDUY$QYDVt2Hn z*Tbx|mO<~2IF^)JJK{nT%Ov(55Uj!`dH_y;v4~qW7=b8>)tj}dZHJt-ttA4FnzF+x z)gaz+mm2yoy}ZWkh+hFKsj0sNtiS|GczgDC>=0#~=0FE^>w8tAhl}og0()E&IQosv z-G8P(Y%X7NmBSi8+Uq8|mB|D(SHeR_VOdX$91G5fyKKd$X4}{_&m+UpyNB zcm~BO*R8`!?l20lQz(I1 z`fo-Id*e&Uj;JPkw-Q*I@o+~ZZb`A9NGYwqW2lRL#)_R`nE|gfjVux+%Xh)GR(e9w zux^70NEa9VCq~Fx`cmD$9?75j%~ok}((N$RS55Sp2cZ#Yk2t!Ylv-$5vX7lKw2i6R zFMGm=jifPOLcMjrZY(m>_Y>rcYF`LqQYi?K)^>Jy6nEwUpQ8Tb>RiBCf#{8{Puxez zW#VS{n}CK4z_#@@`NFLIul$OCSjvM2bf2Gp4L7e9s=Ik6lsZwuha)3Nh|X=UW*XI- zv}KNqe{s$kE~kavdm11oyZ_yX&2@E_pn*UAZh9RlLrjPJy7sFc0^nD?{Vl<40uoG# z@k10UdvaA01A#Y{D>46U`7n=O8C~E<(?8b4 z1{x=p&+SsUE09kr*>dr8y{!Nna3A%GPyYa45@P^kfrU$9%?L45b<{WiyVYD?Vw(F# zhGHHc3`v!A52#V>yME2hysA40o4av z>-88~tkcCnLZF3Uy2*mS$QjI;zM7qKIj;gclf=OYFy-I>eI6>NpnxCd4+l~W=!6~( zA0M`9F1{8J$PP;@Q|ObiJFsSZB>7MBPYu02wlrmP*cij$8-?>>&l;63xzfrod@U|0 zy|{F9w3u582+}oQ579NYD}X7<$E5%?gdnh{7@obV>%m% zG)c}Y?R~iF781DsxtRy-le4et1+px!!6$kV&GyD%_8`r-N6ZIQlzMf31a(iuv|F}8n^AQ^n@Ud(>TbSfDJv2pPC?V3g2F}r> zssA!r(Hm1g#!@Z-%g6x!UpvdNxGG$5{~*b~(oCtX1w4ȶK$RpCRuW_^E|25@B@ zT`e10FifRn2>rK$Sg6-)XBE;c?qMy`^ zqWw-|&D{Ci`x}Xi?wytUt&jKFXH4t;y<@v-R+e_426? zAN%6O4+P!&GlCZs1fhEHI+7J)en-mVftp=87uL(^8PTus&#eVp5~6{_kh4FD?mjmT z(#3@nI-it)fb+#eX7$56e8h1LQt`P!y_9*Pd=P9BvMQ`cBoWW^DJ6_9X!$bhAtk_| zZ}#T_17IIGzO#sFe89$wNj^Ncx^gmnvp;&HNZPPHzaM* zjK^$mzhGGLy&%_JV9KQ24q8C#Md)L5p~q_dumCr0a$2k0jfK!y7I3?*xI+D2izjF%HJKLSB_#JM6Sy@C2f{ zmO)0GHGq0(m&~@fyO-EK^|WI9ZqCgotcf4Owxe=-mkB6V6g7`>oK4RR*_mW?OifJ< zeNwH>lM~t3EDo;k7p^*beeM(ubNHKZR)7PD@@JHnjeii%-p2pzSBzrzYq)9(Qvm>$-_5eER1|um~n=kXDso@96*(g&iM+G1za|( zr7!~{>OI~MlFmn85-Vv!9tbN>%6;Zc%VP}m%fvD`=xZtVb_%RYh8=H=`zUcxUnaz(g#SuO*urZ-> zQ$7F_3%xL<@F$=#NzL5-lt=-zo|zDCE+D_CXnmnONh)#+ieJDBz%NoeG?p0b@hnky zk$YF+jI0Q-=s?wh2jL?YcwzvHet2TsS|PH-^^t}g0i!Onzi4ag*V4%LO14`N$7!0f z!T%d?ox>h>HlXO9$19!!;VJdW1GeW*I%Ekwgx2Gv4?cp)XX zw#I*n!ARl;X6nzBSYGx)&(GI1u^F-Mlcohf^(14d3|7}-v$l}T2PX!_y~pA4D)enQ)6bHigD#kWbv61| ziX~l@9vb8_mmJa6s$|rbPj-OcH(5Z$4Ur5@mPPrNQUNlGkV<}iV%MEYTRjMRW6>QF zLxk3*-6?HroQm61S6x#Z|8UW5{J(@0#th0HP9_URJEl>8)D~sjV#PjVXQ|h2?%Hf} z>6pJq{r1(OkBKc$y{SSiC}Ivvfesf~z;*J(CRd9~CmpQpk91z^(IR7oS^bVT{{Z66 z9m~T3$>GFyFO`QEXzV2bSgl&+X?nV>+Z*km^kmu-`gue@(q-m2jEw@rVM26bSB1O6 zuThfvx{(V(OReYO6yBftKwkTri5ECaN>PqSLmSUserE4#p&&JOZ&+Odcf8q z9}#nT`y^=?QT7BLMwPg?oA4op;fnWe`D@?1>xR?dTUNI;e`wdMxf3<%kB(LGt)wSP zJ=)z&LHuY=fhZ67j9h^Pwg0SP|0S9-myhM9I9rtwEghyxy z%&Ky)a<7=zed-bButbpD6{v9H&wNiX~ z+kZf38QJx?wRZRYO$yDDJuN#I*W(#hF#sA${y;+`Ved9RkLVCMleoL=-TC$dM0B^~ zZ$alH9ejF5RJ?~nLVeE?D_}1urJZ`45do_i=^-l(!~-JTF@S)=GE9X^SRz;pIAFkV zwXprO>7sARa?fLb^2NyM;{G6+8f`h`w}K!0*K5O8DL=#J(&N2lumETy*s{ulbZzVE~7Nex?fC8{K!V5y3^3809S8Cqa|= zy`-?lr)|6@PQ2&Smq>g~wg=JKgphi^C<3)>XD+lIs9k`CZ~SakyR|GuzZjICc=4H> zNa3GpZg~4;SjV%fr^XHDbtV)yPPHew*qdz-TL}f=MAFp#_DYc0GVyIU|BHFvGvE{k z#@#K%otxdxwDsO!buOTfugwAoiL(NyAO6O#5f>dJc>F|_q`nzD^hx(HO3PN|hh>@L`{8uE`ZC^P9QCI^b$T|Dt+`%$%oC|-?{4_ zbNMe>Z5Y4r4^3)gL8CQ0E3KWl@E$?{cKrQL4mymy_eA z&hWBoyY45G0;>i0w;Rq5Wkl>95O$hD|!5xCT2X}V}ArM@HTL>1cacDfaySux)!xl-N zyEBt{?=ySPefR91vvcy#^r5?|x~uB<{b(fvmu}a_iL1-l?uN#cTh6W3a!k@5XMSSN zojY~SsO{k=;a(T*T5@~N_dmYuJOsJOTD9JL-F*<++RNaZ^QP6``vXmsGOU1{R4@H+ z%*8Kj6`&vgDd_WOG&>$&^f@8g+Y%_<0`i*57*GMmh~)-owoqehsI>i?7p+zR1JLr| zKcwG37|#A0{QMIg3*2s35Pw;u|JExGfYyH-dnp<$e*eQxPN{Q&{i~$uCs6#I*Bt73 zFvs$}-ewo`lW#eaY039FPnNKmbQ&6* zzwnuK<5|*epxS3XmO-S%91GFP9p; zG!us8FVn=(xB)B`t7Bj9^DPZI)`AZ7<%-VL zfq|N8c!pZUG=_ntAA#nl=7%hD8&5R(cUxq5lp0)o#if{rA)mQo06uwW$T4%U!W%Q$ zoUN%0VM{vry#AJqEvk;o?&KW7lINjmDkh@HZ?u#e6LnFaC<|0X1loUyo7AFu9`NDtB@XFy2iI=Ea$97-Bo#E|gL-Kl`6S$LMw z#fZ=Dmm!IFQLi!$n~#NAz5>}y;7-q@VPecCha8{UltJzG8yPmdAZBM%ZjW0+{!w0+ z^LxP}#Dx2m64Fug-T3(wiHn}n|oEbECWc$}q)LUYXu0cP( zWPr!u{Eg(x#~QxvlKgxW4jwyGu=wsn07w|NoWCRqfWU*JV9t^@Ts7{w11Q$Ic( z$T*l?E3fX(Y;1I_E1&x~Cd(rLbtMf%usyH*S`q_+)u=7oz7`7@` z^Yicb<@KzABSX4KG+j<;*rsI+U{q7(6#uoof zKt2Wg7e*S)JpN{_86V)HC^P+hxc^anIE}+o@U;T#E50{>?+2r~(h=>976=$eF zMn{rh#{7Vlx~bB^Q3od<5jklHk#5yed%K#rI7gZ2E;g=zzVX@&E*=QR@Ui?^sT!R} zNlR93ok)6<6OJ}r7L+TE&zw^#sgKeinP>j#1v0|xr|d^I*$qs48jvr<4ZA|Mm~vP_vFoSh0D;%5b730Nj(E z67VCd<|{($Cel2m){_GoNPrs=+8<5iKckgJ)GSI}v`mz8J(Cp|X{dgDnhYeOfHpg@ zb)5mh{D_OJs-E$jD6l=_A7Bj)fli9UGyrb=T6F&%+^}Q#e)d=4Oh+mOo9XA$3!K6a z|3xlSR{<08?zgr6YPGYc?^rgvy6$t#e}ox`rw$|5Fux02ODL#(6n#Kv+OsVoMc*G2 z|F1d#=j^vge-9_V|EF*Quo?f8W&mo+pBYc4i)`kj(de{$z3nx&dYph0CLmV+D^_U^ z{!6S9M>;{eSx8ovY%`l0y~pSuMT26~E%~?#l?PxYHG*3BLV0A&F+ty+F zmG=0NgodO`*w0!tF*SH)HhJQa)w?p(#o~)B-=`$kF+|nj)$E&ffxsHdo+izACoCP= z$2+OHZCXxOaLNc=m>BVQyg#LU&XH9>Mq~jFD+l{NlmdjZ(fwbp8MG1id^;5=5ibt- z@ANaKPC&hfrBIDC06330xNC05 zKy*pxy6L9pac;=4K(oM`hrgNyvL^r4Vo?5snr>jLT*l2x-6A3WUqY96IXQH3vGiQ{ zQ9Y3V3W~j57qSzQm;;iUS>jYwhMT+$0RosGVB@_`68|IZeLn<_Ku~6+EL<&8=jedN z92%|Xg^FWJP4CiwlbiXowOnE~2esI@WZry-Ke(CCvatXA+{_{jqvuCAAm!eC!yjb< zf4|81KKyOD`@c!k`XxgdH6zvB&*=DouC?_m37uTWtn=(;Y4e zNws$uhQ->$lB1q?T`AnwTfz8wAMQ{i(nohi!soS`7F#(*K2z9qxEPNu& zXbn$lK5K?TE#+oDt@t3kH&e&OLuk#oJ@dhVf>1Ofr}%*5I@~%+fegg9y{g0@&n6l@ zWR^7K${8&?D@*~43Usi3RncXAt}KkQ5>mMOwR6GI18BSY0uooo{_`G?fMK=VodH*= z6kzd`lV)~1SQ55`)~e<>WV_d) zYY&l6|MhPigkQ+O`GTR8l*@uMMpm0bShS3#H7zS9#dd9ZeOPpa{o#SMNkQzS7=ud- zuI7$Uh$gBUU1=LA=TorMhK~b8S8Y;TFc==&YZ(Kvuzo`F{R}CkN0$w3f;8FV>N&T= zi`}!68HqtJA)>f1@qVI?|kos1*&1 zTNzE5Gt(F>=~OarQR|T#*P5C{=oKCElDvr#kC#(5?%`OPwV0~E^@XXZd#zPCDH zo%t)2@{*OZkHwm+;|>UM=5}!PA0{+mlLS?&2c?*rA*-y?($b$SC2?ITOmYIEx!8nw zm}L*0An}!Jx2ByKFn6tfQ+|=7ANl&&78`pc7I(KAX3!KD;zzvG;pS?-$42}0_kJ&*-IXJ7L0J7Yfby zvqrK}uWt(LI!m}R71c$!LsfPXn2u7G)o$J`^`LAeCEr?JezzZ}Y;P>lA9Qt$I>vk# zU+W;LOUw8@j4n-iL6N_M6>n+T-k*L#%>nt48_IA|4CUkSIx z6pjw=e~-$jhhKUuu1qU2gxr$$8Ow?bK0^fu4(vA-Yknxu2o|f|q2DUVkjNrlF!0Sg zD8${G@C)a7RAPB--q0~Aur^{Ae~L^3F$e^*@Jvog_T#9*K}H=*#2JD*OI3nXppNA4 zDm;n(z3nA1i|U2RjZiljwXy3FC}j*)Y~@{ID{7C;nrpon%Y|6`5#b8#xJ2q#ekQw^;aI;US;-a%9%1s7eF2NY4-QMN>t##ea+ zmyHQ_CF7DiFOtd8mmjq#+i$Gfu2iHHs5)^HZ|8jPpyD2Q#@uGz=p;nN)862q(jno! zl(Wotet@XH9<<1j{Ys6ct;PG4uXAwoolQ?29TFpdBZaxNtgrFppmOQ_ds7(!-NzvC zTjG2z@kpHMYsZN%k>p=C)X4_&dp&hTTN#e89Tnrb-a%MH66#zHujiAECL+_0mslve ze5Fbyj;~Fx+qa4&`$Dh`-|_3r4kJ0*%nrZ#UZxPb7SGke8&qhB;J=oTi0AJt7tURf z)X}iv-8k&6(lZa~*NN{Y&E9FWVlQPFqF5!gS?(?G^9s( zq}C14DpAjTBQ=?+P(3e$qN7{3-AOU6LPxH_%meE59B6g%2Buq8{=7uYARLK~!mHYj zfq{<|8%}PQmt_Z;{6&%q#9LiW^*Q(-?B`|s3p1rV`9~Pe<%t(QFTgvF2m6qOEIE}Y zPb|QZT6A>#a!!E)<|8|ZBGUr*{6+h4w7RSaQ>FOzK;UuT0GZ38bnT!ZKQD>4CAm3I zmRY-3mz=g0ZawoJruwcVkghhv)&Qwn{Ud6d< zaN*K?hkp5xsKJ|&x| z=~H8{9lO&5D~n0Ve|Z7CR!C?uO$Lek5y_Di`efbUayGT}dNCT&D>q9pP<_1{J;dDN znr>aF6z4-$ zUvcHNK*n>qV;n86M)qZ}DWvk4pSYH9WSe9;HGdcjQZKQjT~`)gcSPWj%6dpg2WEV= zD;xL(687Z&%XvfOZ;i^>wZB3iEYAm0;De_XydW!>uha2|afBb0CrB0fU~II|X0Uf`Yt@!9dsE zs7|P~+i5vVPm?;ABZlbU)0y>2PcRVU#Q-v}^e;@3I~(OC$5erai7*od4fSh}4X7PR zw={vDo$3j>rQjz!cZzQ1`;Xf$tLJh2;y!bGYELxr%KBPwDT;vm-NbtEf~6{t<@m}C zw2^NffDYfi8Rm@g$&Lq?0qs77BRNn?iE~vlDdG{8ByoMpwbx|p;gLc$Ecf> zIRhqkP%X!(x&n7jDyIxr(^?*na!%VR8WdMalR{W6fWugr9>UWixPzQYU%7ScW-IYH zmoLq=r4_aJdpFo>)z*$Fm3LJGejvqz-w)_RZWG_K%5@{Pnps6t=>RC`XsDwQv2&2R zn-i&8aw5GlNx#;B2%)V-`hzu+h>bzDxl3a#L@RRHXT*)Dfz+tA&VdZWl@_CRt?F1- z0|(faM`oL~CnG0D?U?h)axQ_=NZA0O8g1d;_tuTbzSGHx*9r#0u_EXc4I_F(h8t8p zw{`o|VD&~>;ajAUMsIt|@7XgYrW=^u0Xz1fqgQfU-8=CZjOzL{B35wmMQiFC^Zg3Z z!J3!5!^yVN)zr0iw&zphojLXIep0KW#{JP zdT*U0K}bo(?g>FPWM>QK4%aX(b&9J;gQhnG7<9S|RPLoXdGTER8|0r|DZN3$429iTx}JV?qSyki)jr0)miG9$Z7=R^MKai&0Ij zi#+Y`PC}jR&^;plf=L3v_q8Nn4HVAF&WKqFKI+AO92E(yuWG92ptpQ;pIeHWj9#U; zn+0T_QEWG%k4uR$>o-{OFPT+f4BF?v)q?|rdW`m?lJ+>Br^NcT2tB$<2F^xBN+7aJ zzJ8R zk=dMmz9-d(y|s}-z-8%kliAGsFrF|o^{|LiLg5t51{A$zi($4v#}lrSH=PW`2y0+! z$#~$`pLhC$FAj_?+o+uCvL^y-cp&1>@8OYSljy(u~5R_{_j@9Vk6id^{U8UAcN zjWrV+8~UZkVh@i{m#4#7_Df6r@t4j_TvBNFK1~q+8?P(+)hZK>$G{l$_z}ekU&IJAF8knuCJpxO*kpq9GsHqxH+4tF(ZL$x)3S_2CV84^d|5U^4?5B%JZqM zqLF<^iUwN5z5#|Q+*{so1mfMjE+#Hg5f5ta-t^M!P?B zPNm-BysvF39L@D%$&3U4@3?Z8gy3{I%*m@`0u<297V;EUdTQNl`Ypjcy4M(@$CXZ` z(sJdPMLVfzKVmxeNIIQcPIwNw)s2Aq!F#HdQakuPgPUkGyh<)>9`4bKsNp?i8+8(= zVTrX!d2Yegq3G55fD5)j`9)GtRuFgt#~anM7T6d8i~Irtw@;oTzw_76^_^pC;9OP@(>$ z=}l3+b2#)6Gb`}!{hFs?oQ18cZkw2umDN;17E+7Oq;L;c(g<+KojKKerfu(-^bx#0 z^_tC_MNRoq*Kn;D3}8@q1F)tgbjWoC^D*iIQG@A8AexqTLp-*Alj;HrFmH#j|X6_y=Xh(K&*7DudI(I zDc|Su@)2&<@F#^9+LqdC(^ZcsmXeZ!srOTeX@%;6Nb^{m9~DfPj*&1gMm~I*lwENa z5czSbgp9LmJJ9yl97=O3?#w2o_6Y-{Z$$h6-hnykQ`n1yghEYJl?C{xtN`1~D_;K* zx{2}TEvJ}20KwnJmxa#Ygyt|3 z^Caywdoqph-h41EhYStLd5@`l;u>i8i{|8(RYhY4tCc2&Z^I>?G{Nlzaoz&R+XWu= zSnXK%tGL*3@A~REHAR%=)57OxFoWaR3vU=mY6ee8>|--2f(5&tLNEkLn8Oy~fk!|D zj+u8dTq%cenSV9m+QnYz@{6osP>+Jpwi7yJ-sq8o-#KT2we)uE2Z~?bU#A_PgLd5L z6gAi{)h~9|axw7xo>afKvI&%C{MciiNSiFvg=|x(p-4Y84|&bo8EQDsXNWc@DmYQa zMvY&JDI2Z$GSc)k*!l|yMbp?df?i@My7enGta>033QoI~54Q1*YW(#4YJC?>lEg#F zHF|#w`&$HY`sCEL`jWwI!loAM>EjuWCD=0o;Izpgs8>;8w)?98bxmtYSToez#i5y% z7zh3+(*Y8BlnL_)obSRpwk?XJ`;kjcgmZ3rTckT7;MjRb0!fMhEfc@n1g6agY3Lob zs5z$ZP-;@R!Na&J$Waqx8D2U(5Ji`L(QbV*ehk8zL0e2}_C9@tS3XLZxbME&SVJDr znf2{a>*OR0L%G-Ig-zz@g+=J4elMemKBtCPS{%nQsMlZ4-nkuqx;-EcmXRCrE80Fi zQfS&sl3>H?J*_gwd5Y$x1M)usN!2?IbKsv6l+!|!glt4?F-Y~%Vj9C>I?$;wTt1C2 zs#+4y$c^#I7Sm+ff&o?+s(Dp-@WB|i?n;gJw6>V<%${UO2w9C}+tScjK`;wmfO!tK ziWdH@!pXC|$1xu?%bDCCOJNV`__KK~QhriL%oL{+A}PPK^4cC@QQn+NF+uo=68gZ*~N+6c-{QWT{7OYG1c8Vc40k#7r(iiCWtBTuX_s;#)91 z$1>holpZ>29kWsIQlP>DLJ?3}I9?J`d_MXn=9R`NnA@Xad6n3ZGm=D#B#}q&^(VEJ1s=b>>w$G(i1Ui?S+w z@^IqQ!NQy>!SEg~_sMO#JZ*&L;z39MSy8m3uU1K@I;7Cgl&X%hiIjTrt~}}aDFGwM z1~?lJjl{nuFyWV??_U)`R4IMU5WBtxNj7j7MD;e%CB8!1zZl=yE zacR+g*CjW9;N^9g!OTc3HNWNFb4*3M={?{#o6(m6kT%&F#njFN7b2=}?@fzf>)Ux(V<0BRM?DB~`fC;G>&FY4RiFnYY zk-_d=Foo5GUPqfXnm&SX+$59UZV~;CTq6(_9TM0ReH5F5DKb`_7X|Y=COtqh1RSTerRQOq+A!K9 zBn1}fSuc%nHV{sWqkhq$&Q&tg#KQVf_*+~mb5)bSVDDmGHY<)en>``c^G9-g@+--p2#0u(=hg?Fx)uO7e9_`U^4-*(J`N{W7dvbd>xt!;hu)2U!yOmQym==u2Kk6PrBoU);*XLHbkJCaq-ur$1hTHSg z+}y%Zl3gr3EuMvPVZAB-68aYqVcR5|{qniWgO8O-fu#v286n)f`AXjm>|*^Xhqg!i z7Ywkqww|MxiozDP0*%m(Cpw;D7f!$e5Pz;oSz9RHohJYO(V4JwH8yjeB?Yf} z$^;8vKcYD7)&$?PiEH2P>&WkRaZquVaWi2e!ET-b$N-538_WP!(clqVGEGX=toC{l z5tGyvYOpYYW(x@o_4*I~hNhq3LJn*sSTFp+R8VUp0Ky6k)=%Lf(Mk+{?g zE?47~mF0xObeUaWD6_m9N|^QV!N6s)pdG7GlhnY%Y8R_h!Q(VxPDOfvh*vZ);z+FB zGqyo5QZ>5EYLsegSB2;yCp|$->9#lY$xxBwKo9Go8h0kKk$75@s@Q(aaEId?xOh4S z%bb=Kvr>9CH{rO-8=2}XG)NcNeLtE56QbHjOml^LL)fjP>|kCI?0YQdRQV65}Sfr!#W2tM3UQ1~OT{wSg` zEDleWtrP=9#K7t?zBrw~4wj6OjV2N+g(zavM&`4r zy&}aepOzR_neWQdovX)3oR6kNlaLsaqTTk6YTudqoPa7}Y{d_EAAK1mZ&JYg@1MkqQYivlIX<}o!X`;q2j^`o#?r0rHamWH#(_4`sWEA3ec z$fZ1TV?u!`SCerSG}m+Cy*&kqDz14)`lmYb9c36=kqao8&;P`k0(VvHhhwr}QhxZnl*9as8B_TKGiFg~ zb;7YhIiSvwZmQY`P{A#fhj!_GL`4so22RDsvSQ+r{LzM^6}2rX+;5Qhmijg0Zzg7 zK4F)TC8FohrE?SY9?ctucO®pa?jW>ZRx*yZ2XzGf?uLKX@89E4J0Ig{>*tkO3g z4*W$UMKdBX*vHOj^)0=+pkt=N%Y6JZ3?e+nw13K5VKDvVt;D!TOxQ;=@Ata^MI8@= zsBckvJE(t$*4z$esc|p^v6ILQ(&00cY332ny2W%PH_&{~TJ5uya5?aMwGb?f9jAL6 zY?~VD7QyNbxPrxL0B*|FeZ|lP_e{76cqK_my6)0iK?CP05c&P#8D3yi7;r0Aw``R-&`^$|ZpC<}=)nshE%;Sw(lWjA+`IS_d zT{f!aTzk28s*VSH7A*DdKo^@;cfm&qD0qqhC*}?PlM^FJF&U{b8GR0*U{w&sMY%8T+`nhrjW6IQB&?Y3P-v3HMQBg znBQA`YcRNaAuv95+0zpaLd_b%GA9kg?s9Wi`OZ+`k<(JX-!C|V4iaFAHTpY&p?7kN zBG)u6^gm;%#(;-m{5?y>Qr`#PW_^RC3ORW)$KoRsUDC*M_mikn?~CHo+!*R*-M3x| z<=5iDCGceJ4GDUhbaR+rTX(3^@ctm^Gtem)juOc3zbIYCj?V;i$`L9QKq-;T=h40O z_vPa@Hx{*+^T$-h_y<9#`3S5ogjvoLnr#VL=Zo21z!BCf61s;ca%0LEK z))(GLNQ`om1UQ*CA;ZN$ceQ+V;3A4eWQY}oZu+!R62QrQwalr@PtQK8n47J#S^0!K z$7&e?2RHybRbhUk$*2Rw`-3X!tVq@-8KM_*uD-d&IQ9T!)Qq86{B^zQ^*gym`D@24 z!{C$SzR=`~Mi03~AV2(U=Bc9QKthW2C1oI<$(6FNxVmybBHhy{;Kr1j7)b`?hto~&^TYlRB8vEc%;_IghliN5 zT=)55fd?Hw$d*5d0AH`v8VG^M3g77g(*j_&pcD}DvFk@9xi?!7K7T{+yDocgwm=bO z)O#XFa&NZyUwt)Na7-ZKtbuzPsh-WDFCGogbF~Z13cKj307~bD{Umh@wNC~t*h=~gLirb@EKb?@E>G2We=N^ z)^dI4ECoA7aDEVVML`OA-`zpB8f8FqS$5++3TQj&Cq<`dNnv-9jB1wiPe4NfV@Q=&qyC~k^>3EE|PD$ zkvs2bBb>PK3s*Po*f8qf1ulfyQe}2Ax{&x2eY!j0|EX9^zWsw@@n8(aT5_(I;b4_O zuU>`c)NEr%i<@iP#Sp;3sGpDzKk)K+7~WJZ@DPLwM`)ca){I+J69#^9bp6n%@tpX^ zV-15VS%TPJp!ud znVeLg9w1GQaLix&Ll%M2!gvF1w&#*+g5Ab-(ZdZGnX=~ymLVeh-{N{-X1fJ+XHoSu zpdZ;{WH*b`OMx_oSYTexx^SH~+7c^B&p2nkJ6_=t=n51TPlnWb%=D*(w&^e7c3kuK z$|g>enZIoIY0l@4UK*D|@E_{+=ol%+!=R6JIJU(t0J+#w@yqwk48De_70j7rS{oUP ze92f6?WC0yeio`|@cqOnF+x#S{Z>_0i(pv`Vm5?1vEBJ-b5X_D%p5?k);rcCCJp_R8|S)PpJ}GEZy5v$W;K?4 zb(}~{J>9Jhe2d=ID@&P_C@cN$P+P1qO*{*>Ls zs9{X0YVp176;Y8F7N+Z{z-RU*+_5pEFF0J~M6GP?bA^5X|GFGhIR1CL9G3R#@bj{0 zdFusf$g0u>c7iNIc8X;$Mjzl!jofuGj}u{v`!DflCJWyTi z^+nAAs{O)2P+{()mlfv%{NuQ7a7&=F>ld(7`lJj`Lvm5lubp71?Su}J3FAIs|JkAzC@T74nO z63FG2gQ5qYULCjX{yOxqk}loukhwfah|=eOsY>Q@%jB_qGM;HD9K=wZN^5EIiG$73 zi@piM3_2|1_hf58c9Cv<>A6n8Gl<2c5RBDj!K1}=J=^iwd%>(= zs(j$hbFAoa<&=fvK;haYojV-_xi^ck)Ro*z$B4G!239(lkxXeo-Zht_PQJ2i|v_DsMw3p`ZV+1_|5F{x>4hl9VfT@&7{*%>Ud^O_{qwGmI87EyA`>S z^3cnb)J+gTY+Lm0 zBCi^^1$&?!TyBRGK6u3L;O@pWY9YY<8&(E^IDBin-?>ZX-x5ba4UCA@H&-GAIr}8j zz63PPt+~`O%R)5@t0DuatrMqkg>_gzDC9v1ZOmsa!6*T25Ae?)JV5sm72uUe5lstpJ+ByrJ>mEyUCTp`9+ZCP;Ft$yMNqxO6 zp|ppi4{3&e_;Vrms_tYnZcIYM#MxX;KuE?eGAK+%s>m`lXcen(dd~nRq;~tyyS0?9s2v8WcdfoJWWw7gbMMRCXUdpeAYji(F4`Ey~3Ih-g>hu!BSURFJ*{?A zGTR7_V`I2?V(O}|zf|c1njt6ppWPL5ixI{`-~7qO*xzwUx(CD5^lwU+k~ZSoQd^k8 z^#vPRU87Oz&Q>dO41b0wx~4|&T|UG_QM-4Ih>#2fp+nNx#E?zlz>q!(?Q3HGtgiXH zC3L`|W{DKfimusIDA76DVr|x5;ythyA&)P*3JSV%UpVh}64G^t!70T=y~Jyjv@Crj z_d%S(N43tT-74uWb49*?_vWxat^had!|5muwk&5g&g|Xv?SvzNq@*Nw<>2X4r0Z?D(0#NB?qFy636{U!y5v|ME6zY zEGA|koe4$WreL1;V}?>L`NIsAGzpR2ssCWdOELAs+VF3j5{gB?8w)hlv(L16tG+~; zQ#O^2DVp&V1`5 z?NE>CHq6uX7;s%qe@;`*UXHUN>&+di>u`m3DkWNb2R{g%Z>9%F9Wn(RJL{QU+gKaV z?#&RlRF07gh)3e*y=vi2Q%I)ds% zD~kGue+n$?74HKA0I=lZ>rZHDmiwHh6v5T zSYF#E2K!o^F*??=ax?L?)QmFX=1;%1ie-Yyk9)(T0?k8b(w?p2=NFthA{vyIUVIX{V4WX4PXPMT(bIn>6Y-+BUD*bX2zAPPG9yrAB~u`+rV{|LdF% z@D<|A5U20)xkD?;*AY~{{Jmwd)I;mxBmua$W61X=6+MtC|DO#T%}nlRz0C=pK{p0B z<0Tmto)^c3TS@-Oo|3rS?FcY8=Se49zISe%_@W*ofk(R4JHC`3W_RG=9R-7quaeY` zok9k(2G~Iy%;a(XArPWSnf5&0Yr5G@3g?RZULS!{!}eM{nIYsd3gsRtapP7nhlh zIS8pqEKUy8`W;n85*<#(!O=T~VR5BtJgGmiWwj4Z!oI=3cRO6ZxNFo;sEcJrgPWQ- zROM5lu{Epq>1M9_sMYXaw>ywopPkHGz~@@^Z1N6%wA9p=5En2qd|U2NL6_h_Hc6T{ z7hz~+$NGjrfOY7E_D_obvnAcWct2Ug*riMCr*>&=ip{LBY~VSa_LZxin=+CjpF228 zPNH2gN^)9VM^zq-rHIe=Ha7IQ77e)6SL-aJg*6s%kuem=3kFY zm?f`ZQPfLXfL1^3nqKy8e<7jL<%96SuZ7k#Uw4AuNzdmINEsO}t30-*R+iTJ;1Q@P zVJGT837Wy3($6@|f5^Nz`vzN$7}v=6qf%9J`k$;Ivg$g9(8dQXpXgzZ_*n-QS(BLx8pT&RL zHB$2sU~E}|`QtqQ6AV_r=VC(Xna3;`Vc%gw)DQ@VCL7bQ*F)gPlzUePiE%wHU^i(=caS2TykemWb?PTPb{uGeuOu26}j$@4wriD6a$T zRnapL`ka zuaYSMWBOZ?ic(C#yw_92JeP!W|Ge5LU~lb3nPwL|J&z z5QYD|__J*1KR{KaTO;S1AZqL9Zy1^Z%&9mJwvmblp57Fe13bM6dhFI=Rq_sD&rT)8 zt?J^<6@$Hb>3L;AlJiyL9p1+Db!S{z8@rYPfl&D2;QJp`I;GytYb&i*b)|fu2UFhu z-XRINfmI)k5~oI7dZ6r19mrzmVf^kN?}UuM&wtN|zcMykqNI_j8!2ul zyd_dCm?9iLIT$o{dpqrQYJW8~=Kn6O`5-h{$>TgsKaIt=-uR|QC*}+j&;@}q;vc%; ziga0gOgYLAAOu+0J^^tQcWTkqhO6pk6sy6NY`joP|}po zNT5w#$ublBx~)E%TqwgSW*q4?`lk@g2mU7nivTtqQnqn#h|h&m`sCAIa^B_y`^%GV z3qTb>GT-5QDK~E3N!GJN38;Iu(u*$}SYaV`-Y&Y{GnK)kYvK1Z1BS)3<|_nOlUG&Q zp*xO75)7wh3`OJKMeAH+88Lul1}IAmTp?F!3q|X-y}2*7ZXf_w0NmmPk@ww3-=c{w zMdS z5}9YQycI>?*Rh`kF=c`5&Fu{5s*&3!)XpwLHKtEbf;~e@2KslBSHYNu!oWze?DYsJ z_3#TIFP1I~NM_oA2PBiNKl;^~fV2CDT^;gv4vptrhLM7{#$bKR8;Hfd_(&H4;P8eb z#GrPj@v1Ot+YGF(dnFstQH5!4ll9X8|9V1#mo>A?mgb#S!SF~~)O&+=D}U4HmzjUL zEMJ>`iiF|P;Zvj@=pBmjkmMUIo{3(E; za+E$$pjeG!eM@YvA)h32J3;s(vYVyRup#WDvY9Y`O>{t+*e>Z3rfej`fbFKgb|uyP zl1#TK{7wBDz#*UmB_O^e4-9}ddjk>5CeMy67tpn7Hz@${`hNht|(}ZM^MzD|G+= z@7bY%&i{ocnm_(}yf;Yar{V{B-GA=p bvvW^ou+}3aa|@>jz<;8GQUYK;?GOJ0%ne7+ literal 0 HcmV?d00001 diff --git a/docs/zh_cn/reward_model/overview.md b/docs/zh_cn/reward_model/overview.md index 84b5ab14b..6c7c976ac 100644 --- a/docs/zh_cn/reward_model/overview.md +++ b/docs/zh_cn/reward_model/overview.md @@ -24,6 +24,20 @@ XTuner 中的 Reward Model 训练具备以下显著优势: 3. **高效训练**:借助 XTuner 的 QLoRA 训练功能,我们能够仅对 Reward Model 的 Value Head 进行全参数训练,而对语言模型本身使用 QLoRA 微调,大幅降低了模型训练的显存开销。 +4. **长文本训练**: 借助 XTuner 的序列并行功能,能够对长文本数据进行训练。 + +![img](./images/sequence_parallel.png) + ### 开始训练 请参[阅快速上手](./quick_start.md)来了解最基本的概念,若希望了解更多训练参数配置相关的内容,请参考[修改Reward Model配置](./modify_settings.md)章节。 + +### 开源模型 + +我们使用 XTuner 训练了 InternLM2 技术报告中的 Reward Model,欢迎下载使用: + +| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | RewardBench Score | +| ------------------------- | -------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | +| **InternLM2-1.8B-Reward** | [🤗internlm2-1_8b-reward](https://huggingface.co/internlm/internlm2-1_8b-reward) | [internlm2-1_8b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b-reward/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-1_8b-reward) | 80.6 | +| **InternLM2-7B-Reward** | [🤗internlm2-7b-reward](https://huggingface.co/internlm/internlm2-7b-reward) | [internlm2-7b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b-reward/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-reward) | 86.6 | +| **InternLM2-20B-Reward** | [🤗internlm2-20b-reward](https://huggingface.co/internlm/internlm2-20b-reward) | [internlm2-20b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b-reward/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-reward) | 89.5 | From 27cf856135ef9db63ec96257c1cb8caf4b87d329 Mon Sep 17 00:00:00 2001 From: LDLINGLINGLING <47373076+LDLINGLINGLING@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:14:55 +0800 Subject: [PATCH 19/29] =?UTF-8?q?Added=20minicpm=20config=20file=20to=20su?= =?UTF-8?q?pport=20sft=E3=80=81qlora=E3=80=81lora=E3=80=81dpo=20(#847)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 增加了minicpm的template * 增加了minicpm自定义预训练数据级的脚本demo * 增加了自定义sft数据集训练的minicpmdemo * 增加了minicpm_1b的dpo训练config * 增加了minicpm_1b中文的lora训练cofig * 增加了minicpm1b全量训练中文的config * 增加了中英双语的minicpm1b的qlora训练config * 增加了minicpm2b使用qlora训练dpo的config * 增加了minicpm2b在中文上全量训练的config * 增加了minicpm2b在中文下训练的loraconfig' * 增加了中英双语下使用qlora训练minicpm2b的config --- .../minicpm_1b_full_custom_pretrain_e1.py | 200 +++++++++++++++ .../minicpm_2b_full_custom_pretrain_e1.py | 200 +++++++++++++++ .../minicpm_1b_full_custom_pretrain_e1.py | 200 +++++++++++++++ .../minicpm_2b_full_custom_pretrain_e1.py | 200 +++++++++++++++ .../minicpm/1_2b/minicpm_1b_dpo_qlora.py | 219 ++++++++++++++++ .../1_2b/minicpm_1b_full_alpaca_zh_e3.py | 201 +++++++++++++++ .../1_2b/minicpm_1b_lora_alpaca_zh_e3.py | 213 ++++++++++++++++ .../1_2b/minicpm_1b_qlora_alpaca_enzh_e3.py | 238 ++++++++++++++++++ .../1_2b/minicpm_1b_qlora_alpaca_zh_e3.py | 221 ++++++++++++++++ .../minicpm/2b/minicpm_2b_dpo_qlora.py | 219 ++++++++++++++++ .../2b/minicpm_2b_full_alpaca_zh_e3.py | 201 +++++++++++++++ .../2b/minicpm_2b_lora_alpaca_zh_e3.py | 213 ++++++++++++++++ .../2b/minicpm_2b_qlora_alpaca_enzh_e3.py | 238 ++++++++++++++++++ .../2b/minicpm_2b_qlora_alpaca_zh_e3.py | 221 ++++++++++++++++ xtuner/utils/templates.py | 3 + 15 files changed, 2987 insertions(+) create mode 100644 xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py create mode 100644 xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_2b_full_custom_pretrain_e1.py create mode 100644 xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py create mode 100644 xtuner/configs/custom_dataset/sft/minicpm/minicpm_2b_full_custom_pretrain_e1.py create mode 100644 xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py create mode 100644 xtuner/configs/minicpm/1_2b/minicpm_1b_full_alpaca_zh_e3.py create mode 100644 xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py create mode 100644 xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_enzh_e3.py create mode 100644 xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_zh_e3.py create mode 100644 xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py create mode 100644 xtuner/configs/minicpm/2b/minicpm_2b_full_alpaca_zh_e3.py create mode 100644 xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py create mode 100644 xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_enzh_e3.py create mode 100644 xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_zh_e3.py diff --git a/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py b/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py new file mode 100644 index 000000000..bafe7f42c --- /dev/null +++ b/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py @@ -0,0 +1,200 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Data format: + +[ + { + "text": "xxx" + }, + { + "text": "xxx" + }, + ... +] +""" # noqa: E501 + +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import pretrain_map_fn +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-1B-sft-bf16' +use_varlen_attn = False + +# Data +data_files = ['/root/ld/pull_request/xtuner/xtuner/configs/custom_dataset/pretrain/minicpm/pretrain.json'] +max_length = 2048 +pack_to_max_length = True + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 1 # bs = 1 GPU * 1 batch_size_per_device * 16 acc +dataloader_num_workers = 0 +max_epochs = 1 +optim_type = AdamW +lr = 2e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = '' +evaluation_inputs = ['上海是', 'Shanghai is'] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path='json', data_files=data_files), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=pretrain_map_fn, + template_map_fn=None, + remove_unused_columns=True, + shuffle_before_pack=False, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_2b_full_custom_pretrain_e1.py b/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_2b_full_custom_pretrain_e1.py new file mode 100644 index 000000000..160495a86 --- /dev/null +++ b/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_2b_full_custom_pretrain_e1.py @@ -0,0 +1,200 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Data format: + +[ + { + "text": "xxx" + }, + { + "text": "xxx" + }, + ... +] +""" # noqa: E501 + +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import pretrain_map_fn +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-2B-sft-bf16' +use_varlen_attn = False + +# Data +data_files = ['/path/to/json/file.json'] +max_length = 2048 +pack_to_max_length = True + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 # bs = 1 GPU * 1 batch_size_per_device * 16 acc +dataloader_num_workers = 0 +max_epochs = 1 +optim_type = AdamW +lr = 2e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = '' +evaluation_inputs = ['上海是', 'Shanghai is'] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path='json', data_files=data_files), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=pretrain_map_fn, + template_map_fn=None, + remove_unused_columns=True, + shuffle_before_pack=False, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py b/xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py new file mode 100644 index 000000000..bafe7f42c --- /dev/null +++ b/xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py @@ -0,0 +1,200 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Data format: + +[ + { + "text": "xxx" + }, + { + "text": "xxx" + }, + ... +] +""" # noqa: E501 + +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import pretrain_map_fn +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-1B-sft-bf16' +use_varlen_attn = False + +# Data +data_files = ['/root/ld/pull_request/xtuner/xtuner/configs/custom_dataset/pretrain/minicpm/pretrain.json'] +max_length = 2048 +pack_to_max_length = True + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 1 # bs = 1 GPU * 1 batch_size_per_device * 16 acc +dataloader_num_workers = 0 +max_epochs = 1 +optim_type = AdamW +lr = 2e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = '' +evaluation_inputs = ['上海是', 'Shanghai is'] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path='json', data_files=data_files), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=pretrain_map_fn, + template_map_fn=None, + remove_unused_columns=True, + shuffle_before_pack=False, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/custom_dataset/sft/minicpm/minicpm_2b_full_custom_pretrain_e1.py b/xtuner/configs/custom_dataset/sft/minicpm/minicpm_2b_full_custom_pretrain_e1.py new file mode 100644 index 000000000..160495a86 --- /dev/null +++ b/xtuner/configs/custom_dataset/sft/minicpm/minicpm_2b_full_custom_pretrain_e1.py @@ -0,0 +1,200 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Data format: + +[ + { + "text": "xxx" + }, + { + "text": "xxx" + }, + ... +] +""" # noqa: E501 + +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import pretrain_map_fn +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-2B-sft-bf16' +use_varlen_attn = False + +# Data +data_files = ['/path/to/json/file.json'] +max_length = 2048 +pack_to_max_length = True + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 # bs = 1 GPU * 1 batch_size_per_device * 16 acc +dataloader_num_workers = 0 +max_epochs = 1 +optim_type = AdamW +lr = 2e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = '' +evaluation_inputs = ['上海是', 'Shanghai is'] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path='json', data_files=data_files), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=pretrain_map_fn, + template_map_fn=None, + remove_unused_columns=True, + shuffle_before_pack=False, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py b/xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py new file mode 100644 index 000000000..ed48f29d0 --- /dev/null +++ b/xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py @@ -0,0 +1,219 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from datasets import load_dataset +import torch +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig +from peft import LoraConfig +from xtuner.dataset.collate_fns.preference_collate_fn import \ + preference_collate_fn +from xtuner.dataset.preference_dataset import (build_preference_dataset, + orpo_dpo_mix_40k_map_fn) +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model.dpo import DPO +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-1B-sft-bf16' +use_varlen_attn = False +dpo_loss_type = 'sigmoid' # One of ['sigmoid', 'hinge', 'ipo', 'kto_pair', 'sppo_hard', 'nca_pair', 'robust'] # noqa: E501 +loss_beta = 0.1 +label_smoothing = 0.0 + +# Data +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 5e-7 # refer to alignment handbook +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + 'What famous British author, known for his tales of mystery and the macabre, shares his initials with a common abbreviation for "rest in peace"?', # noqa: E501 + 'Please tell me five scenic spots in Shanghai', + '890729 - 425663? Only respond with math and no words.' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right') + +model = dict( + type=DPO, + use_varlen_attn=use_varlen_attn, + loss_type=dpo_loss_type, + beta=loss_beta, + label_smoothing=label_smoothing, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=build_preference_dataset, + dataset=dict(type=load_dataset, path='mlabonne/orpo-dpo-mix-40k'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=True, + is_reward=False, + reward_token_id=-1, + num_proc=32, + use_varlen_attn=use_varlen_attn, + shuffle_before_pack=True, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict( + type=preference_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/1_2b/minicpm_1b_full_alpaca_zh_e3.py b/xtuner/configs/minicpm/1_2b/minicpm_1b_full_alpaca_zh_e3.py new file mode 100644 index 000000000..2c1e37ff3 --- /dev/null +++ b/xtuner/configs/minicpm/1_2b/minicpm_1b_full_alpaca_zh_e3.py @@ -0,0 +1,201 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-1B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_en_path = 'silk-road/alpaca-data-gpt4-chinese' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_en = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_en_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_en, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py b/xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py new file mode 100644 index 000000000..428bdcd68 --- /dev/null +++ b/xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py @@ -0,0 +1,213 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-1B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 +gradient_checkpointing = True +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + ), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_zh = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_zh_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_zh, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_enzh_e3.py b/xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_enzh_e3.py new file mode 100644 index 000000000..0adc91aec --- /dev/null +++ b/xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_enzh_e3.py @@ -0,0 +1,238 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import ConcatDataset, process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import (alpaca_map_fn, alpaca_zh_map_fn, + template_map_fn_factory) +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-1B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese' +alpaca_en_path = 'tatsu-lab/alpaca' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_en = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_en_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +alpaca_zh = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_zh_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +train_dataset = dict(type=ConcatDataset, datasets=[alpaca_en, alpaca_zh]) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_zh_e3.py b/xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_zh_e3.py new file mode 100644 index 000000000..ca7816c0a --- /dev/null +++ b/xtuner/configs/minicpm/1_2b/minicpm_1b_qlora_alpaca_zh_e3.py @@ -0,0 +1,221 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-1B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 +gradient_checkpointing = True +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_zh = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_zh_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_zh, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py b/xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py new file mode 100644 index 000000000..6b395ae3e --- /dev/null +++ b/xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py @@ -0,0 +1,219 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from datasets import load_dataset +import torch +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig +from peft import LoraConfig +from xtuner.dataset.collate_fns.preference_collate_fn import \ + preference_collate_fn +from xtuner.dataset.preference_dataset import (build_preference_dataset, + orpo_dpo_mix_40k_map_fn) +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model.dpo import DPO +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-2B-sft-bf16' +use_varlen_attn = False +dpo_loss_type = 'sigmoid' # One of ['sigmoid', 'hinge', 'ipo', 'kto_pair', 'sppo_hard', 'nca_pair', 'robust'] # noqa: E501 +loss_beta = 0.1 +label_smoothing = 0.0 + +# Data +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 5e-7 # refer to alignment handbook +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + 'What famous British author, known for his tales of mystery and the macabre, shares his initials with a common abbreviation for "rest in peace"?', # noqa: E501 + 'Please tell me five scenic spots in Shanghai', + '890729 - 425663? Only respond with math and no words.' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right') + +model = dict( + type=DPO, + use_varlen_attn=use_varlen_attn, + loss_type=dpo_loss_type, + beta=loss_beta, + label_smoothing=label_smoothing, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=build_preference_dataset, + dataset=dict(type=load_dataset, path='mlabonne/orpo-dpo-mix-40k'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=True, + is_reward=False, + reward_token_id=-1, + num_proc=32, + use_varlen_attn=use_varlen_attn, + shuffle_before_pack=True, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict( + type=preference_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/2b/minicpm_2b_full_alpaca_zh_e3.py b/xtuner/configs/minicpm/2b/minicpm_2b_full_alpaca_zh_e3.py new file mode 100644 index 000000000..c699ff876 --- /dev/null +++ b/xtuner/configs/minicpm/2b/minicpm_2b_full_alpaca_zh_e3.py @@ -0,0 +1,201 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoModelForCausalLM, AutoTokenizer + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-2B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_en_path = 'silk-road/alpaca-data-gpt4-chinese' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_en = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_en_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_en, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py b/xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py new file mode 100644 index 000000000..ff1988a68 --- /dev/null +++ b/xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py @@ -0,0 +1,213 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-2B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 +gradient_checkpointing = True +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + ), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_zh = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_zh_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_zh, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_enzh_e3.py b/xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_enzh_e3.py new file mode 100644 index 000000000..2082e4c24 --- /dev/null +++ b/xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_enzh_e3.py @@ -0,0 +1,238 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import ConcatDataset, process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import (alpaca_map_fn, alpaca_zh_map_fn, + template_map_fn_factory) +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-2B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese' +alpaca_en_path = 'tatsu-lab/alpaca' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_en = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_en_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +alpaca_zh = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_zh_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +train_dataset = dict(type=ConcatDataset, datasets=[alpaca_en, alpaca_zh]) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_zh_e3.py b/xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_zh_e3.py new file mode 100644 index 000000000..86d3564da --- /dev/null +++ b/xtuner/configs/minicpm/2b/minicpm_2b_qlora_alpaca_zh_e3.py @@ -0,0 +1,221 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_zh_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'openbmb/MiniCPM-2B-sft-bf16' +use_varlen_attn = False + +# Data +alpaca_zh_path = 'silk-road/alpaca-data-gpt4-chinese' +prompt_template = PROMPT_TEMPLATE.minicpm +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 16 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 +gradient_checkpointing = True +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right', + eos_token='') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_zh = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_zh_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_zh_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_zh, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/utils/templates.py b/xtuner/utils/templates.py index da1d96d2c..ae8ba1523 100644 --- a/xtuner/utils/templates.py +++ b/xtuner/utils/templates.py @@ -130,6 +130,9 @@ SYSTEM=('[INST] {system} [/INST]\n'), INSTRUCTION=('[INST] {input} [/INST]'), SEP='\n'), + minicpm=dict( + INSTRUCTION=('<用户> {input} '), + SEP='\n'), gemma=dict( # `system` field is extended by xtuner SYSTEM=('system\n{system}\n'), From f49ac9895336ff8f6936b91ab04725020b5c53b3 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:50:12 +0800 Subject: [PATCH 20/29] fix lint (#856) * fix lint * fix lint --- .../minicpm/minicpm_1b_full_custom_pretrain_e1.py | 2 +- .../sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py | 2 +- xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py | 8 +++++--- .../configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py | 5 ++--- xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py | 8 +++++--- xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py | 5 ++--- xtuner/utils/templates.py | 4 +--- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py b/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py index bafe7f42c..fc0da5ed3 100644 --- a/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py +++ b/xtuner/configs/custom_dataset/pretrain/minicpm/minicpm_1b_full_custom_pretrain_e1.py @@ -36,7 +36,7 @@ use_varlen_attn = False # Data -data_files = ['/root/ld/pull_request/xtuner/xtuner/configs/custom_dataset/pretrain/minicpm/pretrain.json'] +data_files = ['/path/to/json/file.json'] max_length = 2048 pack_to_max_length = True diff --git a/xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py b/xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py index bafe7f42c..fc0da5ed3 100644 --- a/xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py +++ b/xtuner/configs/custom_dataset/sft/minicpm/minicpm_1b_full_custom_pretrain_e1.py @@ -36,7 +36,7 @@ use_varlen_attn = False # Data -data_files = ['/root/ld/pull_request/xtuner/xtuner/configs/custom_dataset/pretrain/minicpm/pretrain.json'] +data_files = ['/path/to/json/file.json'] max_length = 2048 pack_to_max_length = True diff --git a/xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py b/xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py index ed48f29d0..b0fc4556a 100644 --- a/xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py +++ b/xtuner/configs/minicpm/1_2b/minicpm_1b_dpo_qlora.py @@ -1,13 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. -from datasets import load_dataset import torch +from datasets import load_dataset from mmengine.dataset import DefaultSampler from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, LoggerHook, ParamSchedulerHook) from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR -from torch.optim import AdamW -from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + from xtuner.dataset.collate_fns.preference_collate_fn import \ preference_collate_fn from xtuner.dataset.preference_dataset import (build_preference_dataset, diff --git a/xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py b/xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py index 428bdcd68..e0ed46147 100644 --- a/xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py +++ b/xtuner/configs/minicpm/1_2b/minicpm_1b_lora_alpaca_zh_e3.py @@ -7,8 +7,7 @@ from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR from peft import LoraConfig from torch.optim import AdamW -from transformers import (AutoModelForCausalLM, AutoTokenizer, - BitsAndBytesConfig) +from transformers import AutoModelForCausalLM, AutoTokenizer from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn @@ -78,7 +77,7 @@ pretrained_model_name_or_path=pretrained_model_name_or_path, trust_remote_code=True, torch_dtype=torch.float16, - ), + ), lora=dict( type=LoraConfig, r=64, diff --git a/xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py b/xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py index 6b395ae3e..abf1e7ef9 100644 --- a/xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py +++ b/xtuner/configs/minicpm/2b/minicpm_2b_dpo_qlora.py @@ -1,13 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. -from datasets import load_dataset import torch +from datasets import load_dataset from mmengine.dataset import DefaultSampler from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, LoggerHook, ParamSchedulerHook) from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR -from torch.optim import AdamW -from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + from xtuner.dataset.collate_fns.preference_collate_fn import \ preference_collate_fn from xtuner.dataset.preference_dataset import (build_preference_dataset, diff --git a/xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py b/xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py index ff1988a68..a50fe91ab 100644 --- a/xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py +++ b/xtuner/configs/minicpm/2b/minicpm_2b_lora_alpaca_zh_e3.py @@ -7,8 +7,7 @@ from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR from peft import LoraConfig from torch.optim import AdamW -from transformers import (AutoModelForCausalLM, AutoTokenizer, - BitsAndBytesConfig) +from transformers import AutoModelForCausalLM, AutoTokenizer from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn @@ -78,7 +77,7 @@ pretrained_model_name_or_path=pretrained_model_name_or_path, trust_remote_code=True, torch_dtype=torch.float16, - ), + ), lora=dict( type=LoraConfig, r=64, diff --git a/xtuner/utils/templates.py b/xtuner/utils/templates.py index ae8ba1523..59b472731 100644 --- a/xtuner/utils/templates.py +++ b/xtuner/utils/templates.py @@ -130,9 +130,7 @@ SYSTEM=('[INST] {system} [/INST]\n'), INSTRUCTION=('[INST] {input} [/INST]'), SEP='\n'), - minicpm=dict( - INSTRUCTION=('<用户> {input} '), - SEP='\n'), + minicpm=dict(INSTRUCTION=('<用户> {input} '), SEP='\n'), gemma=dict( # `system` field is extended by xtuner SYSTEM=('system\n{system}\n'), From 5a93e7d7518d347a9c381ce261fa4390a4385b7b Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:50:31 +0800 Subject: [PATCH 21/29] bump version to 0.1.22 (#855) --- xtuner/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xtuner/version.py b/xtuner/version.py index 85cbb5561..c77f4bf8f 100644 --- a/xtuner/version.py +++ b/xtuner/version.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -__version__ = '0.1.21' +__version__ = '0.1.22' short_version = __version__ From f30ad4c4302573f64e703bcf2aa12de3fce98f75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= Date: Mon, 22 Jul 2024 14:45:09 +0800 Subject: [PATCH 22/29] Support InternVL 1.5/2.0 finetune (#737) * support internvl finetune * fix * fix * fix * update * update * update * update cfg * fix * support phi3 * support full+lora+qlora * support internvl 26b * fix lora cfg * update all * update * update * update config * update config * update config * fix type and add readme * update readme * RENAME * fix * update * support internvl2 * update --- xtuner/configs/internvl/README.md | 152 +++++++ xtuner/configs/internvl/README_zh-CN.md | 152 +++++++ .../internvl/v1_5/convert_to_official.py | 56 +++ .../internvl_v1_5_internlm2_26b_finetune.py | 170 ++++++++ ...ternvl_v1_5_internlm2_26b_lora_finetune.py | 183 ++++++++ ...ernvl_v1_5_internlm2_26b_qlora_finetune.py | 185 ++++++++ .../internvl_v1_5_internlm2_2b_finetune.py | 170 ++++++++ ...nternvl_v1_5_internlm2_2b_lora_finetune.py | 183 ++++++++ ...ternvl_v1_5_internlm2_2b_qlora_finetune.py | 185 ++++++++ .../v1_5/internvl_v1_5_phi3_4b_finetune.py | 170 ++++++++ .../internvl_v1_5_phi3_4b_lora_finetune.py | 183 ++++++++ .../internvl_v1_5_phi3_4b_qlora_finetune.py | 185 ++++++++ .../v2/internvl_v2_internlm2_26b_finetune.py | 170 ++++++++ ...internvl_v2_internlm2_26b_lora_finetune.py | 183 ++++++++ ...nternvl_v2_internlm2_26b_qlora_finetune.py | 185 ++++++++ .../v2/internvl_v2_internlm2_2b_finetune.py | 170 ++++++++ .../internvl_v2_internlm2_2b_lora_finetune.py | 183 ++++++++ ...internvl_v2_internlm2_2b_qlora_finetune.py | 185 ++++++++ .../v2/internvl_v2_internlm2_5_8b_finetune.py | 170 ++++++++ ...nternvl_v2_internlm2_5_8b_lora_finetune.py | 183 ++++++++ ...ternvl_v2_internlm2_5_8b_qlora_finetune.py | 185 ++++++++ .../v2/internvl_v2_phi3_4b_finetune.py | 170 ++++++++ .../v2/internvl_v2_phi3_4b_lora_finetune.py | 183 ++++++++ .../v2/internvl_v2_phi3_4b_qlora_finetune.py | 185 ++++++++ xtuner/dataset/__init__.py | 3 +- .../dataset/collate_fns/default_collate_fn.py | 3 +- xtuner/dataset/internvl_dataset.py | 409 ++++++++++++++++++ xtuner/dataset/samplers/length_grouped.py | 6 + xtuner/model/__init__.py | 3 +- xtuner/model/internvl.py | 320 ++++++++++++++ 30 files changed, 4867 insertions(+), 3 deletions(-) create mode 100644 xtuner/configs/internvl/README.md create mode 100644 xtuner/configs/internvl/README_zh-CN.md create mode 100644 xtuner/configs/internvl/v1_5/convert_to_official.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_lora_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_qlora_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_lora_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_qlora_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_lora_finetune.py create mode 100644 xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_qlora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_lora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_qlora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_lora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_qlora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_lora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_qlora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_phi3_4b_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_phi3_4b_lora_finetune.py create mode 100644 xtuner/configs/internvl/v2/internvl_v2_phi3_4b_qlora_finetune.py create mode 100644 xtuner/dataset/internvl_dataset.py create mode 100644 xtuner/model/internvl.py diff --git a/xtuner/configs/internvl/README.md b/xtuner/configs/internvl/README.md new file mode 100644 index 000000000..1f1acf191 --- /dev/null +++ b/xtuner/configs/internvl/README.md @@ -0,0 +1,152 @@ +# InterVL Full Pipeline + +English | [简体中文](./README_zh-CN.md) + +## InterVL 2 + +> [InternVL-2: Better than the Best—Expanding Performance Boundaries of Open-Source Multimodal Models with the Progressive Scaling Strategy](https://internvl.github.io/blog/2024-07-02-InternVL-2.0/) + +We introduce InternVL-2, currently the most powerful open-source Multimodal Large Language Model (MLLM). The InternVL-2 family includes models ranging from a 2B model, suitable for edge devices, to a 108B model, which is significantly more powerful. With larger-scale language models, InternVL-2-Pro demonstrates outstanding multimodal understanding capabilities, matching the performance of commercial closed-source models across various benchmarks. + +InternVL-2 family is built upon the following designs: + +- Progressive with larger language models: We introduce a progressive alignment training strategy, resulting in the first vision foundation model aligned with large language models. By employing the progressive training strategy where the model scales from small to large while the data refines from coarse to fine, we have completed the training of large models at a relatively low cost. This approach has demonstrated excellent performance with limited resources. +- Multimodal input: With one set of parameters, our model supports multiple modalities of input, including text, images, video, audio, and 3D point clouds. +- Multitask output: Our model supports various output formats, such as images, bounding boxes, and masks, demonstrating extensive versatility. By connecting the MLLM with multiple downstream task decoders, InternVL-2 can be generalized to hundreds of vision-language tasks while achieving performance comparable to expert models. + +
    +Image +
    + +### Basic Introduction + +- `./v2/` contains the configuration files for training InterVL 2 +- Supported fine-tuning of the InternVL 2B/4B/8B/26B model in full/LoRA/QLoRA single-image mode for now. We will support fine-tuning on multiple images and videos as soon as possible. +- After training, you can use the `./v1_5/convert_to_official.py` script to convert the model trained by XTuner to the official format, so as to reuse all the official supported toolchains +- All configurations are based on 8xA100 80G graphics cards, 2B/4B can use ZERO1 training, 8B models can use ZERO2, 26B models must run ZERO3, and there is no excessive adjustment of parameters, you can modify them according to your own needs +- It is verified with LLaVA SFT data, which cannot fully reflect the fine-tuning performance. You can customize the data according to your own needs. We will provide a relatively fair fine-tuning dataset later + +### Data preparation + +If you also want to use the LLaVA SFT dataset for training, please refer to the [document](../../../docs/en/user_guides/dataset_prepare.md#llava-dataset) to prepare the data. + +For custom data, support multiple json and jsonl formats, the data organization can refer to the LLaVA SFT format, and support data sampling operations. + +**(1) Support multiple json or jsonl data** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + template=prompt_template, + max_length=max_length) +``` + +**(2) Support custom sampling** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + repeat_times=[2,0.5,3.5], + template=prompt_template, + max_length=max_length) +``` + +### Training + +The provided configuration is mainly used for fine-tuning based on the official weights. After preparing the data, you can use the following command to train: + +```bash +NPROC_PER_NODE=8 xtuner train internvl_v2_internlm2_5_8b_lora_finetune --deepspeed deepspeed_zero2 +``` + +Default saved in `./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/`. + +### Model Conversion + +After training, we will get a set of weights, that is `./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/iter_xxx.pth`, in order to facilitate evaluation and dialogue, we can convert it to official weights. + +```bash +python xtuner/configs/internvl/v1_5/convert_to_official.py xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_lora_finetune.py ./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/iter_xxx.pth ./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/convert_model/ +``` + +Here, a complete set of official weights including configuration will be generated under `./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/convert_model`, you can use the [official toolchain](https://huggingface.co/OpenGVLab/InternVL2-8B) for evaluation and dialogue. + +If you encounter any problems during use, please feel free to contact us!!! + +## InterVL 1.5 + +> [How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites](https://arxiv.org/abs/2404.16821) + +In this report, we introduce InternVL 1.5, an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. We introduce three simple improvements: (1) Strong Vision Encoder: we explored a continuous learning strategy for the large-scale vision foundation model -- InternViT-6B, boosting its visual understanding capabilities, and making it can be transferred and reused in different LLMs. (2) Dynamic High-Resolution: we divide images into tiles ranging from 1 to 40 of 448×448 pixels according to the aspect ratio and resolution of the input images, which supports up to 4K resolution input. (3) High-Quality Bilingual Dataset: we carefully collected a high-quality bilingual dataset that covers common scenes, document images, and annotated them with English and Chinese question-answer pairs, significantly enhancing performance in OCR- and Chinese-related tasks. We evaluate InternVL 1.5 through a series of benchmarks and comparative studies. Compared to both open-source and proprietary models, InternVL 1.5 shows competitive performance, achieving state-of-the-art results in 8 of 18 benchmarks. + +
    +Image +
    + +### Basic Introduction + +- `./v1_5/` contains the configuration files for training InterVL 1.5 +- Support InternVL 2B/4B/26B model full/LoRA/Qing efficiency and performance, it is recommended to choose the 4B model first +- After training, you can use the `./v1_5/convert_to_official.py` script to convert the model trained by XTuner to the official format, so as to reuse all the official supported toolchains +- All configurations are based on 8xA100 80G graphics cards, 2B/4B can use ZERO1 training, 8B models must run ZERO2, and there is no excessive adjustment of parameters, you can modify them according to your own needs +- It is verified with LLaVA SFT data, which cannot fully reflect the fine-tuning performance. You can customize the data according to your own needs. We will provide a relatively fair fine-tuning dataset later + +### Data preparation + +If you also want to use the LLaVA SFT dataset for training, please refer to the [document](../../../docs/en/user_guides/dataset_prepare.md#llava-dataset) to prepare the data. + +For custom data, support multiple json and jsonl formats, the data organization can refer to the LLaVA SFT format, and support data sampling operations. + +**(1) Support multiple json or jsonl data** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + template=prompt_template, + max_length=max_length) +``` + +**(2) Support custom sampling** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + repeat_times=[2,0.5,3.5], + template=prompt_template, + max_length=max_length) +``` + +### Training + +The provided configuration is mainly used for fine-tuning based on the official weights. After preparing the data, you can use the following command to train: + +```bash +NPROC_PER_NODE=8 xtuner train internvl_v1_5_phi3_4b_lora_finetune --deepspeed deepspeed_zero1 +# NPROC_PER_NODE=8 xtuner train internvl_v1_5_internlm2_26b_lora_finetune.py --deepspeed deepspeed_zero3 +``` + +Default saved in `./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/`. + +### Model Conversion + +After training, we will get a set of weights, that is `./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/iter_xxx.pth`, in order to facilitate evaluation and dialogue, we can convert it to official weights. + +```bash +python xtuner/configs/internvl/v1_5/convert_to_official.py xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_lora_finetune.py ./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/iter_xxx.pth ./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/internvl_v1_5_phi3_4b/ +``` + +Here, a complete set of official weights including configuration will be generated under `./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/internvl_v1_5_phi3_4b/`, you can use the [official toolchain](https://github.com/OpenGVLab/InternVL) for evaluation and dialogue. + +If you encounter any problems during use, please feel free to contact us!!! diff --git a/xtuner/configs/internvl/README_zh-CN.md b/xtuner/configs/internvl/README_zh-CN.md new file mode 100644 index 000000000..cdaa59348 --- /dev/null +++ b/xtuner/configs/internvl/README_zh-CN.md @@ -0,0 +1,152 @@ +# InterVL 全流程 + +[English](./README.md) | 简体中文 + +## InterVL 2 + +> [InternVL-2: Better than the Best—Expanding Performance Boundaries of Open-Source Multimodal Models with the Progressive Scaling Strategy](https://internvl.github.io/blog/2024-07-02-InternVL-2.0/) + +我们引入了 InternVL-2,目前最强大的开源多模态大语言模型(MLLM)。InternVL-2 系列包括从适合于边缘设备的 2B 模型到强大的 108B 模型等多种规模的模型。借助更大规模的语言模型,InternVL-2-Pro 展现出了出色的多模态理解能力,在各种基准测试中的性能与商业闭源模型相匹配。 + +InternVL-2 系列基于以下设计: + +- 渐进式的大型语言模型:我们引入了一种渐进式对齐训练策略,实现了首个与大型语言模型对齐的视觉基础模型。通过采用从小到大模型扩展、从粗到细数据优化的渐进式训练策略,我们以较低的成本完成了大模型的训练。这种方法已经展示了出色的性能,资源有限的情况下也能取得良好的结果。 +- 多模态输入:使用一套参数,我们的模型支持文本、图像、视频、音频和 3D 点云等多种输入模态。 +- 多任务输出:我们的模型支持图像、边界框和掩码等各种输出格式,展现出广泛的多功能性。通过将 MLLM 与多个下游任务解码器相连接,InternVL-2 可以泛化到数百个视觉语言任务,并取得与专家模型相当的性能。 + +
    +Image +
    + +### 基本说明 + +- `./v2/` 包含着 InterVL 2 训练配置的配置文件 +- 支持了 InternVL 2B/4B/8B/26B 模型全量/LoRA/QLoRA 单图模式的微调,会尽快支持多图和视频的微调。 +- 在训练完成后,可以使用 `./v1_5/convert_to_official.py` 脚本将 XTuner 训练的模型转换为官方格式,从而复用官方所支持的所有工具链 +- 目前所有配置都是以 8xA100 80G 显卡为基准,2B/4B 可以使用 ZERO1 训练,8B 模型要 ZERO2 运行,26B 模型必须要 ZERO3,并且没有对参数进行过多的调整,你可以按照你自己的需求进行修改 +- 目前是以 LLaVA SFT 数据进行验证,无法充分反应微调性能,你可以根据自己的需求进行数据自定义,后续我们会提供一个相对公平的微调数据集 + +### 数据准备 + +如果你也想使用 LLaVA SFT 数据集进行训练,请参考[文档](../../../docs/zh_cn/user_guides/dataset_prepare.md#llava-dataset) 准备数据。 + +对于自定义数据,支持多种 json 和 jsonl 格式,内部数据组织可以参考 LLaVA SFT 格式,且支持数据采样操作。 + +**(1) 支持多个 json 或者 jsonl 数据** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + template=prompt_template, + max_length=max_length) +``` + +**(2) 支持自定义采样** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + repeat_times=[2,0.5,3.5], + template=prompt_template, + max_length=max_length) +``` + +### 训练流程 + +所提供的配置主要用于基于官方权重继续微调。在准备好数据后,你可以使用以下命令进行训练: + +```bash +NPROC_PER_NODE=8 xtuner train internvl_v2_internlm2_5_8b_lora_finetune --deepspeed deepspeed_zero2 +``` + +默认保存在 `./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/`。 + +### 模型转换 + +训练后,我们将获得一组权重即 `./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/iter_xxx.pth`,为了方便评测和对话,可以将其转换为官方权重。 + +```bash +python xtuner/configs/internvl/v1_5/convert_to_official.py xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_lora_finetune.py ./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/iter_xxx.pth ./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/convert_model/ +``` + +此时,会在 `./work_dirs/internvl_v2_internlm2_5_8b_lora_finetune/convert_model` 下生成一组包括配置的完整官方权重,你可以使用[官方工具链](https://huggingface.co/OpenGVLab/InternVL2-8B)进行评测和对话。 + +如果你在使用中碰到任何问题,欢迎联系我们!!! + +## InterVL 1.5 + +> [How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites](https://arxiv.org/abs/2404.16821) + +在本报告中,我们介绍了开源多模态大语言模型 InternVL 1.5,以弥补开源模型与商业专有模型在多模态理解能力上的差距。我们引入了三项简单的改进:(1) 强大的视觉编码器:我们探索了大规模视觉基础模型 InternViT-6B 的连续学习策略,提升了其视觉理解能力,并使其可以在不同的大语言模型中进行迁移和重复利用。(2) 动态高分辨率:我们根据输入图像的长宽比和分辨率,将图像划分为从1到40个448×448像素的瓦片,支持高达4K分辨率的输入。(3) 高质量双语数据集:我们精心收集了一个高质量的双语数据集,涵盖了常见场景、文档图像,并用英语和中文问答对进行了注释,显著提升了在OCR和中文相关任务中的性能。我们通过一系列基准测试和对比研究评估了 InternVL 1.5。与开源和专有模型相比,InternVL 1.5 表现出了竞争力,在18个基准中的8个中取得了最先进的结果。 + +
    +Image +
    + +### 基本说明 + +- `./v1_5/` 包含着 InterVL 1.5 训练配置的配置文件 +- 支持 InternVL 2B/4B/26B 模型全量/LoRA/QLoRA 微调,综合考虑效率性能,建议你优先选择 4B 模型 +- 在训练完成后,可以使用 `./v1_5/convert_to_official.py` 脚本将 XTuner 训练的模型转换为官方格式,从而复用官方所支持的所有工具链 +- 目前所有配置都是以 8xA100 80G 显卡为基准,2B/4B 可以使用 ZERO1 训练,26B 模型必须要 ZERO3 运行,并且没有对参数进行过多的调整,你可以按照你自己的需求进行修改 +- 目前是以 LLaVA SFT 数据进行验证,无法充分反应微调性能,你可以根据自己的需求进行数据自定义,后续我们会提供一个相对公平的微调数据集 + +### 数据准备 + +如果你也想使用 LLaVA SFT 数据集进行训练,请参考[文档](../../../docs/zh_cn/user_guides/dataset_prepare.md#llava-dataset) 准备数据。 + +对于自定义数据,支持多种 json 和 jsonl 格式,内部数据组织可以参考 LLaVA SFT 格式,且支持数据采样操作。 + +**(1) 支持多个 json 或者 jsonl 数据** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + template=prompt_template, + max_length=max_length) +``` + +**(2) 支持自定义采样** + +```text +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=['a.json','b.jsonl','c.json'], + image_folders=['a',None,'c'], + repeat_times=[2,0.5,3.5], + template=prompt_template, + max_length=max_length) +``` + +### 训练流程 + +所提供的配置主要用于基于官方权重继续微调。在准备好数据后,你可以使用以下命令进行训练: + +```bash +NPROC_PER_NODE=8 xtuner train internvl_v1_5_phi3_4b_lora_finetune --deepspeed deepspeed_zero1 +# NPROC_PER_NODE=8 xtuner train internvl_v1_5_internlm2_26b_lora_finetune.py --deepspeed deepspeed_zero3 +``` + +默认保存在 `./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/`。 + +### 模型转换 + +训练后,我们将获得一组权重即 `./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/iter_xxx.pth`,为了方便评测和对话,可以将其转换为官方权重。 + +```bash +python xtuner/configs/internvl/v1_5/convert_to_official.py xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_lora_finetune.py ./work_dirs/iter_xxx.pth ./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/internvl_v1_5_phi3_4b/ +``` + +此时,会在 `./work_dirs/internvl_v1_5_phi3_4b_lora_finetune/internvl_v1_5_phi3_4b/` 下生成一组包括配置的完整官方权重,你可以使用[官方工具链](https://github.com/OpenGVLab/InternVL)进行评测和对话。 + +如果你在使用中碰到任何问题,欢迎联系我们!!! diff --git a/xtuner/configs/internvl/v1_5/convert_to_official.py b/xtuner/configs/internvl/v1_5/convert_to_official.py new file mode 100644 index 000000000..765855daa --- /dev/null +++ b/xtuner/configs/internvl/v1_5/convert_to_official.py @@ -0,0 +1,56 @@ +import argparse +import os.path as osp + +import torch +from mmengine.config import Config +from transformers import AutoTokenizer + +from xtuner.model.utils import LoadWoInit +from xtuner.registry import BUILDER + + +def convert_to_official(config, trained_path, save_path): + cfg = Config.fromfile(config) + cfg.model.pretrained_pth = trained_path + cfg.model.quantization_vit = False + cfg.model.quantization_llm = False + + with LoadWoInit(): + model = BUILDER.build(cfg.model) + model.to(torch.bfloat16) + + if model.use_visual_encoder_lora: + vision_model = model.model.vision_model.merge_and_unload() + model.model.vision_model = vision_model + + if model.use_llm_lora: + language_model = model.model.language_model.merge_and_unload() + model.model.language_model = language_model + + model.model.save_pretrained(save_path) + + tokenizer = AutoTokenizer.from_pretrained( + cfg.model.model_path, trust_remote_code=True) + tokenizer.save_pretrained(save_path) + + print(model) + + +def main(): + parser = argparse.ArgumentParser( + description='Convert the pth model to HuggingFace model') + parser.add_argument('config', help='config file name or path.') + parser.add_argument('trained_model_pth', help='The trained model path.') + parser.add_argument( + 'save_path', help='The path to save the converted model.') + args = parser.parse_args() + + if osp.realpath(args.trained_model_pth) == osp.realpath(args.save_path): + raise ValueError( + 'The trained path and save path should not be the same.') + + convert_to_official(args.config, args.trained_model_pth, args.save_path) + + +if __name__ == '__main__': + main() diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_finetune.py new file mode 100644 index 000000000..d5eec7829 --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_finetune.py @@ -0,0 +1,170 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL-Chat-V1-5' +prompt_template = PROMPT_TEMPLATE.internlm2_chat + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +max_length = 4096 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 8 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 2e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.01 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=False, + freeze_visual_encoder=True # or False +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_lora_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_lora_finetune.py new file mode 100644 index 000000000..0fb511d42 --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_lora_finetune.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL-Chat-V1-5' +prompt_template = PROMPT_TEMPLATE.internlm2_chat + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +max_length = 4096 + +# Scheduler & Optimizer +batch_size = 2 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 2e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.01 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_qlora_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_qlora_finetune.py new file mode 100644 index 000000000..8d994c81d --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_26b_qlora_finetune.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL-Chat-V1-5' +prompt_template = PROMPT_TEMPLATE.internlm2_chat + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +max_length = 4096 + +# Scheduler & Optimizer +batch_size = 2 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 2e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.01 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + quantization_llm=True, # or False + quantization_vit=False, # or True and uncomment visual_encoder_lora + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_finetune.py new file mode 100644 index 000000000..09fb01e3f --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_finetune.py @@ -0,0 +1,170 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/Mini-InternVL-Chat-2B-V1-5' +prompt_template = PROMPT_TEMPLATE.internlm2_chat + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 4 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=False, + freeze_visual_encoder=True # or False +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_lora_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_lora_finetune.py new file mode 100644 index 000000000..193e2f269 --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_lora_finetune.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/Mini-InternVL-Chat-2B-V1-5' +prompt_template = PROMPT_TEMPLATE.internlm2_chat + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_qlora_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_qlora_finetune.py new file mode 100644 index 000000000..6bb28e490 --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_internlm2_2b_qlora_finetune.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/Mini-InternVL-Chat-2B-V1-5' +prompt_template = PROMPT_TEMPLATE.internlm2_chat + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + quantization_llm=True, # or False + quantization_vit=False, # or True and uncomment visual_encoder_lora + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_finetune.py new file mode 100644 index 000000000..5d34a928b --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_finetune.py @@ -0,0 +1,170 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.phi3_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 4 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=False, + freeze_visual_encoder=True # or False +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_lora_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_lora_finetune.py new file mode 100644 index 000000000..19588cb95 --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_lora_finetune.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.phi3_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_qlora_finetune.py b/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_qlora_finetune.py new file mode 100644 index 000000000..cb150f0c4 --- /dev/null +++ b/xtuner/configs/internvl/v1_5/internvl_v1_5_phi3_4b_qlora_finetune.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.phi3_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + quantization_llm=True, # or False + quantization_vit=False, # or True and uncomment visual_encoder_lora + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_finetune.py new file mode 100644 index 000000000..0916df44a --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_finetune.py @@ -0,0 +1,170 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-26B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 8 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=False, + freeze_visual_encoder=True # or False +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_lora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_lora_finetune.py new file mode 100644 index 000000000..045fd7055 --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_lora_finetune.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-26B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 2 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_qlora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_qlora_finetune.py new file mode 100644 index 000000000..60717b312 --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_26b_qlora_finetune.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-26B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 2 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + quantization_llm=True, # or False + quantization_vit=False, # or True and uncomment visual_encoder_lora + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_finetune.py new file mode 100644 index 000000000..a921cf0c0 --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_finetune.py @@ -0,0 +1,170 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-2B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 4 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=False, + freeze_visual_encoder=True # or False +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_lora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_lora_finetune.py new file mode 100644 index 000000000..44b3c3944 --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_lora_finetune.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-2B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_qlora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_qlora_finetune.py new file mode 100644 index 000000000..5840a593f --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_2b_qlora_finetune.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-2B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + quantization_llm=True, # or False + quantization_vit=False, # or True and uncomment visual_encoder_lora + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_finetune.py new file mode 100644 index 000000000..2a92c017f --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_finetune.py @@ -0,0 +1,170 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-8B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 4 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=False, + freeze_visual_encoder=True # or False +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_lora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_lora_finetune.py new file mode 100644 index 000000000..d9fa7ab3a --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_lora_finetune.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-8B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_qlora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_qlora_finetune.py new file mode 100644 index 000000000..b3d04bb43 --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_internlm2_5_8b_qlora_finetune.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-8B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + quantization_llm=True, # or False + quantization_vit=False, # or True and uncomment visual_encoder_lora + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_finetune.py new file mode 100644 index 000000000..41a712569 --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_finetune.py @@ -0,0 +1,170 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-4B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.phi3_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 4 # per_device +accumulative_counts = 4 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=False, + freeze_visual_encoder=True # or False +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_lora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_lora_finetune.py new file mode 100644 index 000000000..64a20450f --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_lora_finetune.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-4B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.phi3_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_qlora_finetune.py b/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_qlora_finetune.py new file mode 100644 index 000000000..8302fa5cc --- /dev/null +++ b/xtuner/configs/internvl/v2/internvl_v2_phi3_4b_qlora_finetune.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import AutoTokenizer + +from xtuner.dataset import InternVL_V1_5_Dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.samplers import LengthGroupedSampler +from xtuner.engine.hooks import DatasetInfoHook +from xtuner.engine.runner import TrainLoop +from xtuner.model import InternVL_V1_5 +from xtuner.utils import PROMPT_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +path = 'OpenGVLab/InternVL2-4B' + +# Data +data_root = './data/llava_data/' +data_path = data_root + 'LLaVA-Instruct-150K/llava_v1_5_mix665k.json' +image_folder = data_root + 'llava_images' +prompt_template = PROMPT_TEMPLATE.phi3_chat +max_length = 8192 + +# Scheduler & Optimizer +batch_size = 8 # per_device +accumulative_counts = 2 +dataloader_num_workers = 4 +max_epochs = 1 +optim_type = AdamW +# official 1024 -> 4e-5 +lr = 1e-6 +betas = (0.9, 0.999) +weight_decay = 0.05 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 1000 +save_total_limit = 1 # Maximum checkpoints to keep (-1 means unlimited) + +####################################################################### +# PART 2 Model & Tokenizer & Image Processor # +####################################################################### +model = dict( + type=InternVL_V1_5, + model_path=path, + freeze_llm=True, + freeze_visual_encoder=True, + quantization_llm=True, # or False + quantization_vit=False, # or True and uncomment visual_encoder_lora + # comment the following lines if you don't want to use Lora in llm + llm_lora=dict( + type=LoraConfig, + r=128, + lora_alpha=256, + lora_dropout=0.05, + target_modules=None, + task_type='CAUSAL_LM'), + # uncomment the following lines if you don't want to use Lora in visual encoder # noqa + # visual_encoder_lora=dict( + # type=LoraConfig, r=64, lora_alpha=16, lora_dropout=0.05, + # target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2']) +) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +llava_dataset = dict( + type=InternVL_V1_5_Dataset, + model_path=path, + data_paths=data_path, + image_folders=image_folder, + template=prompt_template, + max_length=max_length) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=llava_dataset, + sampler=dict( + type=LengthGroupedSampler, + length_property='modality_length', + per_device_batch_size=batch_size * accumulative_counts), + collate_fn=dict(type=default_collate_fn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=path, + trust_remote_code=True) + +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), +] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + save_optimizer=False, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/dataset/__init__.py b/xtuner/dataset/__init__.py index bcfe0dcc3..2ad3d7bd9 100644 --- a/xtuner/dataset/__init__.py +++ b/xtuner/dataset/__init__.py @@ -6,6 +6,7 @@ from .intern_repo import (build_packed_dataset, load_intern_repo_tokenized_dataset, load_intern_repo_untokenized_dataset) +from .internvl_dataset import InternVL_V1_5_Dataset from .json_dataset import load_json_file from .llava import LLaVADataset from .modelscope import process_ms_dataset @@ -24,5 +25,5 @@ 'load_intern_repo_tokenized_dataset', 'load_intern_repo_untokenized_dataset', 'build_packed_dataset', 'RefCOCOJsonDataset', 'RefCOCOJsonEvalDataset', 'InvRefCOCOJsonDataset', - 'load_json_file' + 'load_json_file', 'InternVL_V1_5_Dataset' ] diff --git a/xtuner/dataset/collate_fns/default_collate_fn.py b/xtuner/dataset/collate_fns/default_collate_fn.py index 0ca9264f0..3d9fe18fb 100644 --- a/xtuner/dataset/collate_fns/default_collate_fn.py +++ b/xtuner/dataset/collate_fns/default_collate_fn.py @@ -89,7 +89,8 @@ def default_collate_fn(instances: Sequence[Dict], } if has_image: - pixel_values = torch.stack(pixel_values) + if all(x.shape == pixel_values[0].shape for x in pixel_values): + pixel_values = torch.stack(pixel_values, dim=0) data_dict['pixel_values'] = pixel_values if return_hf_format: diff --git a/xtuner/dataset/internvl_dataset.py b/xtuner/dataset/internvl_dataset.py new file mode 100644 index 000000000..82904ae87 --- /dev/null +++ b/xtuner/dataset/internvl_dataset.py @@ -0,0 +1,409 @@ +import copy +import io +import json +import os +import random +import warnings + +import numpy as np +import torch +import torchvision.transforms as T +from mmengine import print_log +from mmengine.fileio import get +from PIL import Image +from torch.utils.data import Dataset +from torchvision.transforms.functional import InterpolationMode +from transformers import AutoConfig, AutoTokenizer + +from xtuner.utils import IGNORE_INDEX + + +# Referenced from InternVL +def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, + image_size): + best_ratio_diff = float('inf') + best_ratio = (1, 1) + area = width * height + for ratio in target_ratios: + target_aspect_ratio = ratio[0] / ratio[1] + ratio_diff = abs(aspect_ratio - target_aspect_ratio) + if ratio_diff < best_ratio_diff: + best_ratio_diff = ratio_diff + best_ratio = ratio + elif ratio_diff == best_ratio_diff: + if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]: + best_ratio = ratio + return best_ratio + + +def dynamic_preprocess(image, + min_num=1, + max_num=6, + image_size=448, + use_thumbnail=False): + orig_width, orig_height = image.size + aspect_ratio = orig_width / orig_height + + # calculate the existing image aspect ratio + target_ratios = {(i, j) + for n in range(min_num, max_num + 1) + for i in range(1, n + 1) for j in range(1, n + 1) + if i * j <= max_num and i * j >= min_num} + target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1]) + + # find the closest aspect ratio to the target + target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, + target_ratios, orig_width, + orig_height, image_size) + + # calculate the target width and height + target_width = image_size * target_aspect_ratio[0] + target_height = image_size * target_aspect_ratio[1] + blocks = target_aspect_ratio[0] * target_aspect_ratio[1] + + # resize the image + resized_img = image.resize((target_width, target_height)) + processed_images = [] + for i in range(blocks): + box = ((i % (target_width // image_size)) * image_size, + (i // (target_width // image_size)) * image_size, + ((i % (target_width // image_size)) + 1) * image_size, + ((i // (target_width // image_size)) + 1) * image_size) + # split the image + split_img = resized_img.crop(box) + processed_images.append(split_img) + assert len(processed_images) == blocks + if use_thumbnail and len(processed_images) != 1: + thumbnail_img = image.resize((image_size, image_size)) + processed_images.append(thumbnail_img) + return processed_images + + +def total_image_token(orig_size, + min_num=1, + max_num=12, + image_size=448, + use_thumbnail=True): + orig_width, orig_height = orig_size + + aspect_ratio = orig_width / orig_height + + # calculate the existing image aspect ratio + target_ratios = {(i, j) + for n in range(min_num, max_num + 1) + for i in range(1, n + 1) for j in range(1, n + 1) + if max_num >= i * j >= min_num} + target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1]) + + # find the closest aspect ratio to the target + target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, + target_ratios, orig_width, + orig_height, image_size) + blocks = target_aspect_ratio[0] * target_aspect_ratio[1] + + if use_thumbnail: + blocks += 1 + + return blocks + + +def load_json_or_jsonl(json_path): + if json_path.endswith('.json'): + with open(json_path) as f: + data = json.load(f) + elif json_path.endswith('.jsonl'): + with open(json_path) as f: + data = [json.loads(line) for line in f] + else: + raise ValueError(f'Unsupported file format: {json_path}, ' + f'only support .json and .jsonl.') + return data + + +class InternVL_V1_5_Dataset(Dataset): + os.environ['TOKENIZERS_PARALLELISM'] = 'true' + IMG_CONTEXT_TOKEN = '' + IMG_START_TOKEN = '' + IMG_END_TOKEN = '' + + IMAGENET_MEAN = (0.485, 0.456, 0.406) + IMAGENET_STD = (0.229, 0.224, 0.225) + + def __init__(self, + model_path, + template, + data_paths, + image_folders=None, + repeat_times=1, + max_length=8192): + self.template = template + self.max_length = max_length + + self.cfg = AutoConfig.from_pretrained( + model_path, trust_remote_code=True) + + # The following modifications are only to ensure full + # consistency with the official template, + # without investigating the impact on performance. + if self.cfg.llm_config.architectures[0] == 'Phi3ForCausalLM': + self._system = 'You are an AI assistant whose name is Phi-3.' + self.template[ + 'INSTRUCTION'] = '<|user|>\n{input}<|end|><|assistant|>\n' + elif self.cfg.llm_config.architectures[0] == 'InternLM2ForCausalLM': + self._system = 'You are an AI assistant whose name ' \ + 'is InternLM (书生·浦语).' + self.template['SYSTEM'] = '<|im_start|>system\n{system}<|im_end|>' + self.template[ + 'INSTRUCTION'] = '<|im_start|>user\n{input}' \ + '<|im_end|><|im_start|>assistant\n' + else: + raise NotImplementedError + + self.min_dynamic_patch = self.cfg.min_dynamic_patch + self.max_dynamic_patch = self.cfg.max_dynamic_patch + self.downsample_ratio = self.cfg.downsample_ratio + self.image_size = self.cfg.force_image_size + self.use_thumbnail = self.cfg.use_thumbnail + patch_size = self.cfg.vision_config.patch_size + self.patch_token = int( + (self.image_size // patch_size)**2 * (self.downsample_ratio**2)) + self.tokenizer = AutoTokenizer.from_pretrained( + model_path, trust_remote_code=True) + self.transformer = T.Compose([ + T.Lambda(lambda img: img.convert('RGB') + if img.mode != 'RGB' else img), + T.Resize((self.image_size, self.image_size), + interpolation=InterpolationMode.BICUBIC), + T.ToTensor(), + T.Normalize(mean=self.IMAGENET_MEAN, std=self.IMAGENET_STD) + ]) + + if not isinstance(data_paths, (list, tuple)): + data_paths = [data_paths] + if not isinstance(image_folders, (list, tuple)): + image_folders = [image_folders] + if not isinstance(repeat_times, (list, tuple)): + repeat_times = [repeat_times] + assert len(data_paths) == len(image_folders) == len(repeat_times) + + print_log('Starting to loading data and calc length', logger='current') + self.data = [] + self.image_folder = [] + self.group_length = [] + self.conv2length_text = { + } # using dict to speedup the calculation of token length + + for data_file, image_folder, repeat_time in zip( + data_paths, image_folders, repeat_times): + print_log( + f'=======Starting to process {data_file} =======', + logger='current') + assert repeat_time > 0 + json_data = load_json_or_jsonl(data_file) + if repeat_time < 1: + json_data = random.sample(json_data, + int(len(json_data) * repeat_time)) + elif repeat_time > 1: + int_repeat_time = int(repeat_time) + remaining_repeat_time = repeat_time - repeat_time + if remaining_repeat_time > 0: + remaining_json_data = random.sample( + json_data, int(len(json_data) * remaining_repeat_time)) + json_data = json_data * int_repeat_time + json_data.extend(remaining_json_data) + else: + json_data = json_data * int_repeat_time + + self.data.extend(json_data) + self.image_folder.extend([image_folder] * len(json_data)) + + # TODO: multi process + for data_item in json_data: + if 'length' in data_item: + token_length = data_item['length'] # include image token + else: + conversations = '\n'.join( + [temp['value'] for temp in data_item['conversations']]) + str_length = len(conversations) + + if str_length not in self.conv2length_text: + token_length = self.tokenizer( + conversations, + return_tensors='pt', + padding=False, + truncation=False, + ).input_ids.size(1) + self.conv2length_text[str_length] = token_length + else: + token_length = self.conv2length_text[str_length] + + if 'image' in data_item and data_item['image'] is not None: + if 'image_wh' in data_item and data_item[ + 'image_wh'] is not None: + # more accurate calculation of image token + image_wh = data_item['image_wh'] + if isinstance(image_wh[0], list): + image_wh = image_wh[0] + image_token = total_image_token( + image_wh, self.min_dynamic_patch, + self.max_dynamic_patch, self.image_size, + self.use_thumbnail) + image_token = self.patch_token * image_token + else: + # max_dynamic_patch + use_thumbnail + image_token = self.patch_token * ( + self.max_dynamic_patch + self.use_thumbnail) + + token_length = token_length + image_token + else: + token_length = -token_length + + self.group_length.append(token_length) + print_log( + f'=======total {len(json_data)} samples of {data_file}=======', + logger='current') + + assert len(self.group_length) == len(self.data) + print_log('end loading data and calc length', logger='current') + print_log( + f'=======total {len(self.data)} samples=======', logger='current') + self._max_refetch = 1000 + + def __getitem__(self, index): + for _ in range(self._max_refetch + 1): + data = self.prepare_data(index) + # Broken images may cause the returned data to be None + if data is None: + index = self._rand_another() + continue + return data + + def __len__(self): + return len(self.data) + + @property + def modality_length(self): + return self.group_length + + @property + def length(self): + group_length = np.array(self.group_length) + group_length = np.abs(group_length).tolist() + return group_length + + def prepare_data(self, index): + data_dict: dict = self.data[index] + image_folder = self.image_folder[index] + + out_data_dict = {} + if data_dict.get('image', None) is not None: + image_file = data_dict['image'] + if isinstance(image_file, (list, tuple)): + assert len(image_file) == 1 + image_file = image_file[0] + + try: + image = self.get_image(os.path.join(image_folder, image_file)) + except Exception as e: + print(f'Error: {e}', flush=True) + print_log(f'Error: {e}', logger='current') + return None + + images = dynamic_preprocess(image, self.min_dynamic_patch, + self.max_dynamic_patch, + self.image_size, self.use_thumbnail) + pixel_values = [self.transformer(image) for image in images] + pixel_values = torch.stack(pixel_values) + out_data_dict['pixel_values'] = pixel_values + + num_image_tokens = pixel_values.shape[0] * self.patch_token + image_token_str = f'{self.IMG_START_TOKEN}' \ + f'{self.IMG_CONTEXT_TOKEN * num_image_tokens}' \ + f'{self.IMG_END_TOKEN}' + token_dict = self.get_inputid_labels(data_dict['conversations'], + image_token_str) + out_data_dict.update(token_dict) + else: + token_dict = self.get_inputid_labels(data_dict['conversations'], + None) + out_data_dict.update(token_dict) + out_data_dict['pixel_values'] = torch.zeros( + 1, 3, self.image_size, self.image_size) + return out_data_dict + + def _rand_another(self) -> int: + return np.random.randint(0, len(self.data)) + + def get_image(self, path): + if 's3://' in path: + img_bytes = get(path) + with io.BytesIO(img_bytes) as buff: + img = Image.open(buff).convert('RGB') + return img + else: + return Image.open(path).convert('RGB') + + def get_inputid_labels(self, conversations, image_token_str) -> dict: + input = '' + out_conversation = [] + while conversations and conversations[0]['from'] == 'gpt': + # Skip the first one if it is from gpt + conversations = conversations[1:] + for msg in conversations: + if msg['from'] == 'human': + if image_token_str is None and '' in msg['value']: + warnings.warn( + f'The current data << {msg["value"]} >> is ' + f'in plain text mode, but ' + 'there are tags present in the data. ' + 'We need to remove the tags.') + msg['value'] = msg['value'].replace('', '') + if '' in msg['value']: + msg['value'] = msg['value'].replace('', '').strip() + msg['value'] = image_token_str + '\n' + msg['value'] + msg['value'] = msg['value'].strip() + input += msg['value'].strip() + elif msg['from'] == 'gpt': + out_conversation.append({ + 'input': input, + 'output': msg['value'].strip() + }) + input = '' + else: + raise NotImplementedError + + input_ids, labels = [], [] + for i, single_turn_conversation in enumerate(out_conversation): + input = single_turn_conversation.get('input', '') + if input is None: + input = '' + input_text = self.template.INSTRUCTION.format( + input=input, round=i + 1) + + if i == 0: + system = self.template.SYSTEM.format(system=self._system) + input_text = system + input_text + input_encode = self.tokenizer.encode( + input_text, add_special_tokens=True) + else: + input_encode = self.tokenizer.encode( + input_text, add_special_tokens=False) + input_ids += input_encode + labels += [IGNORE_INDEX] * len(input_encode) + + output_text = single_turn_conversation.get('output', '') + if self.template.get('SUFFIX', None): + output_text += self.template.SUFFIX + output_encode = self.tokenizer.encode( + output_text, add_special_tokens=False) + input_ids += output_encode + labels += copy.deepcopy(output_encode) + + if len(input_ids) > self.max_length: + input_ids = input_ids[:self.max_length] + labels = labels[:self.max_length] + print_log( + f'Warning: input_ids length({len(input_ids)}) ' + f'is longer than max_length, cut to {self.max_length}', + logger='current') + return {'input_ids': input_ids, 'labels': labels} diff --git a/xtuner/dataset/samplers/length_grouped.py b/xtuner/dataset/samplers/length_grouped.py index ad37957f2..184827837 100644 --- a/xtuner/dataset/samplers/length_grouped.py +++ b/xtuner/dataset/samplers/length_grouped.py @@ -4,6 +4,7 @@ import torch from mmengine.dist import get_dist_info, sync_random_seed +from mmengine.logging import print_log from torch.utils.data import ConcatDataset as TorchConcatDataset from torch.utils.data import Sampler @@ -78,6 +79,7 @@ def __init__(self, mega_batch_mult: Optional[int] = None, seed: Optional[int] = None, round_up: bool = True) -> None: + print_log('LengthGroupedSampler is used.', logger='current') rank, world_size = get_dist_info() self.rank = rank self.world_size = world_size @@ -120,6 +122,10 @@ def __init__(self, assert isinstance(self.length, (list, tuple)) self.total_batch_size = total_batch_size + print_log( + f'LengthGroupedSampler construction is complete, ' + f'and the selected attribute is {length_property}', + logger='current') def __iter__(self) -> Iterator[int]: """Iterate the indices.""" diff --git a/xtuner/model/__init__.py b/xtuner/model/__init__.py index 39547b2d7..1b3a501d4 100644 --- a/xtuner/model/__init__.py +++ b/xtuner/model/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .internvl import InternVL_V1_5 from .llava import LLaVAModel from .sft import SupervisedFinetune -__all__ = ['SupervisedFinetune', 'LLaVAModel'] +__all__ = ['SupervisedFinetune', 'LLaVAModel', 'InternVL_V1_5'] diff --git a/xtuner/model/internvl.py b/xtuner/model/internvl.py new file mode 100644 index 000000000..0358266a9 --- /dev/null +++ b/xtuner/model/internvl.py @@ -0,0 +1,320 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections import OrderedDict +from typing import List, Optional, Tuple, Union + +import torch +from mmengine import print_log +from mmengine.config import Config, ConfigDict +from mmengine.model import BaseModel +from peft import get_peft_model, prepare_model_for_kbit_training +from torch.nn import CrossEntropyLoss +from transformers import (AutoConfig, AutoModel, AutoTokenizer, + BitsAndBytesConfig) +from transformers.modeling_outputs import CausalLMOutputWithPast + +from xtuner.registry import BUILDER +from .utils import (find_all_linear_names, get_peft_model_state_dict, + guess_load_checkpoint, make_inputs_require_grad) + + +class InternVL_V1_5(BaseModel): + + def __init__(self, + model_path, + freeze_llm=False, + freeze_visual_encoder=False, + llm_lora=None, + visual_encoder_lora=None, + quantization_vit=False, + quantization_llm=False, + pretrained_pth=None): + print_log('Start to load InternVL_V1_5 model.', logger='current') + super().__init__() + self.freeze_llm = freeze_llm + self.freeze_visual_encoder = freeze_visual_encoder + self.use_llm_lora = llm_lora is not None + self.use_visual_encoder_lora = visual_encoder_lora is not None + self.quantization_vit = quantization_vit + self.quantization_llm = quantization_llm + if quantization_vit: + assert visual_encoder_lora is not None + if quantization_llm: + assert quantization_llm and llm_lora is not None + + config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) + if config.llm_config.model_type == 'internlm2': + config.llm_config.attn_implementation = 'flash_attention_2' + else: + config.llm_config._attn_implementation = 'flash_attention_2' + + if quantization_vit is False and quantization_llm is False: + quantization = None + else: + llm_int8_skip_modules = ['mlp1'] + if quantization_llm and not quantization_vit: + llm_int8_skip_modules.append('vision_model') + + if quantization_vit and not quantization_llm: + llm_int8_skip_modules.append('language_model') + + quantization_config = dict( + type=BitsAndBytesConfig, + llm_int8_skip_modules=llm_int8_skip_modules, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4') + quantization_clazz = quantization_config.pop('type') + quantization = quantization_clazz(**quantization_config) + + self.model = AutoModel.from_pretrained( + model_path, + torch_dtype=torch.bfloat16, + quantization_config=quantization, + config=config, + trust_remote_code=True) + + tokenizer = AutoTokenizer.from_pretrained( + model_path, trust_remote_code=True) + img_context_token_id = tokenizer.convert_tokens_to_ids('') + self.model.img_context_token_id = img_context_token_id + + if self.freeze_llm: + self.model.language_model.requires_grad_(False) + if self.freeze_visual_encoder: + self.model.vision_model.requires_grad_(False) + + if hasattr(self.model.language_model, 'enable_input_require_grads'): + self.model.language_model.enable_input_require_grads() + else: + self.model.language_model.get_input_embeddings( + ).register_forward_hook(make_inputs_require_grad) + + self.gradient_checkpointing_enable() + + if self.use_llm_lora: + self._prepare_llm_for_lora(llm_lora) + + if self.use_visual_encoder_lora: + self._prepare_visual_encoder_for_lora(visual_encoder_lora) + + if pretrained_pth is not None: + pretrained_state_dict = guess_load_checkpoint(pretrained_pth) + + self.load_state_dict(pretrained_state_dict, strict=False) + print(f'Load pretrained weight from {pretrained_pth}') + + self._count = 0 + print_log(self, logger='current') + print_log('InternVL_V1_5 construction is complete', logger='current') + + def _parse_lora_config(self, lora_config): + if isinstance(lora_config, dict) or isinstance( + lora_config, Config) or isinstance(lora_config, ConfigDict): + lora_config = BUILDER.build(lora_config) + return lora_config + + def _prepare_llm_for_lora(self, + lora_config, + use_activation_checkpointing=True): + lora_config = self._parse_lora_config(lora_config) + self.model.language_model = prepare_model_for_kbit_training( + self.model.language_model, use_activation_checkpointing) + if lora_config.target_modules is None: + modules = find_all_linear_names(self.model.language_model) + lora_config.target_modules = modules + self.model.language_model = get_peft_model(self.model.language_model, + lora_config) + + def _prepare_visual_encoder_for_lora(self, lora_config): + lora_config = self._parse_lora_config(lora_config) + if lora_config.target_modules is None: + modules = find_all_linear_names(self.model.vision_model) + lora_config.target_modules = modules + self.model.vision_model = get_peft_model(self.model.vision_model, + lora_config) + + def gradient_checkpointing_enable(self): + self.activation_checkpointing_enable() + + def activation_checkpointing_enable(self): + self.model.language_model.gradient_checkpointing_enable() + + def gradient_checkpointing_disable(self): + self.activation_checkpointing_disable() + + def activation_checkpointing_disable(self): + self.model.language_model.gradient_checkpointing_disable() + + def state_dict(self, *args, **kwargs): + state_dict = super().state_dict(*args, **kwargs) + to_return = OrderedDict() + # Step 1. visual_encoder + if self.use_visual_encoder_lora: + to_return.update( + get_peft_model_state_dict( + self.model.vision_model, state_dict=state_dict)) + elif not self.freeze_visual_encoder: + to_return.update({ + k: v + for k, v in state_dict.items() if 'model.vision_model.' in k + }) + # Step 2. LLM + if self.use_llm_lora: + to_return.update( + get_peft_model_state_dict( + self.model.language_model, state_dict=state_dict)) + elif not self.freeze_llm: + to_return.update({ + k: v + for k, v in state_dict.items() if 'model.language_model.' in k + }) + # Step 3. Projector + to_return.update( + {k: v + for k, v in state_dict.items() if 'model.mlp1.' in k}) + return to_return + + def init_weights(self): + pass + + def forward(self, data, data_samples=None, mode='loss'): + pixel_values = data['pixel_values'] + + if type(pixel_values) is list or pixel_values.ndim == 5: + if type(pixel_values) is list: + pixel_values = [ + x.unsqueeze(0) if x.ndim == 3 else x for x in pixel_values + ] + # b*n, c, h, w + concat_images = torch.cat([ + image.to(self.model.vision_model.dtype) + for image in pixel_values + ], + dim=0) + else: + raise NotImplementedError() + + input_ids = data['input_ids'] + position_ids = data['position_ids'] + attention_mask = data['attention_mask'] + # sum is 0 are text + image_flags = torch.sum(concat_images, dim=(1, 2, 3)) != 0 + image_flags = image_flags.long() + + labels = data['labels'] + use_cache = False + + # Directly calling this code in LORA fine-tuning + # will result in an error,so we must rewrite it. + # TODO: Once the official is fixed, we can remove it. + # outputs = self.model(input_ids=input_ids, + # position_ids=position_ids, + # attention_mask=attention_mask, + # image_flags=image_flags, + # pixel_values=concat_images, + # labels=labels, + # use_cache=use_cache) + outputs = self._llm_forward( + input_ids=input_ids, + position_ids=position_ids, + attention_mask=attention_mask, + image_flags=image_flags, + pixel_values=concat_images, + labels=labels, + use_cache=use_cache) + loss_dict = {'loss': outputs.loss} + return loss_dict + + def _llm_forward( + self, + pixel_values: torch.FloatTensor, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + image_flags: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + return_dict = return_dict if return_dict is not None \ + else self.model.config.use_return_dict + + image_flags = image_flags.squeeze(-1) + # We only added the clone code here to avoid the error. + input_embeds = self.model.language_model.get_input_embeddings()( + input_ids).clone() + + vit_embeds = self.model.extract_feature(pixel_values) + vit_embeds = vit_embeds[image_flags == 1] + vit_batch_size = pixel_values.shape[0] + + B, N, C = input_embeds.shape + input_embeds = input_embeds.reshape(B * N, C) + + if torch.distributed.get_rank() == 0 and self._count % 100 == 0: + print(f'dynamic ViT batch size: {vit_batch_size}, ' + f'images per sample: {vit_batch_size / B}, ' + f'dynamic token length: {N}') + self._count += 1 + + input_ids = input_ids.reshape(B * N) + selected = (input_ids == self.model.img_context_token_id) + try: + input_embeds[ + selected] = input_embeds[selected] * 0.0 + vit_embeds.reshape( + -1, C) + except Exception as e: + vit_embeds = vit_embeds.reshape(-1, C) + print(f'warning: {e}, input_embeds[selected].shape=' + f'{input_embeds[selected].shape}, ' + f'vit_embeds.shape={vit_embeds.shape}') + n_token = selected.sum() + input_embeds[ + selected] = input_embeds[selected] * 0.0 + vit_embeds[:n_token] + + input_embeds = input_embeds.reshape(B, N, C) + + outputs = self.model.language_model( + inputs_embeds=input_embeds, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + logits = outputs.logits + + loss = None + if labels is not None: + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = CrossEntropyLoss() + shift_logits = shift_logits.view( + -1, self.model.language_model.config.vocab_size) + shift_labels = shift_labels.view(-1) + # Enable model parallelism + shift_labels = shift_labels.to(shift_logits.device) + loss = loss_fct(shift_logits, shift_labels) + + if not return_dict: + output = (logits, ) + outputs[1:] + return (loss, ) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) From 9444b3423d3f9b0a409e879eb24d06a4a190a9b5 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:46:06 +0800 Subject: [PATCH 23/29] [Bug] fix preference_collate_fn attn_mask (#859) fix preference_collate_fn attn_mask --- .../dataset/collate_fns/preference_collate_fn.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/xtuner/dataset/collate_fns/preference_collate_fn.py b/xtuner/dataset/collate_fns/preference_collate_fn.py index ca21613bb..4b6a7f5c3 100644 --- a/xtuner/dataset/collate_fns/preference_collate_fn.py +++ b/xtuner/dataset/collate_fns/preference_collate_fn.py @@ -58,14 +58,14 @@ def preference_collate_fn(instances: Sequence[Dict], labels = torch.stack(labels) if use_varlen_attn: - attention_mask = torch.ones_like(input_ids).bool() + attention_mask = None position_ids = torch.stack(position_ids, dim=0) else: # Some tokenizers have the same eos token and pad token, so input_ids # cannot be masked directly based on the pad token id. attention_mask = torch.zeros_like(input_ids).bool() - for i in ori_length: - attention_mask[:i] = True + for i, length in enumerate(ori_length): + attention_mask[i, :length] = True bs, seq_len = input_ids.shape position_ids = torch.arange(seq_len).unsqueeze(0).long().repeat(bs, 1) @@ -74,11 +74,12 @@ def preference_collate_fn(instances: Sequence[Dict], input_ids = pad_for_sequence_parallel(input_ids, pad_index) labels = pad_for_sequence_parallel(labels, IGNORE_INDEX) position_ids = pad_for_sequence_parallel(position_ids, 0) - # We use attention_mask to distinguish `input_ids` from - # (sequence parallel) pad tokens in `get_var_len_atten_logps` method of - # class `DPO` and `ORPO` - attention_mask = pad_for_sequence_parallel(attention_mask, 0) + if attention_mask is not None: + attention_mask = pad_for_sequence_parallel(attention_mask, 0) if use_varlen_attn: + # We use attention_mask to distinguish `input_ids` from + # (sequence parallel) pad tokens in `get_var_len_atten_logps` + # method of class `DPO` and `ORPO` (cumulative_len, attention_mask ) = pad_cumulative_len_for_sequence_parallel(cumulative_len) From 94a4fcb0f53d63ceb85334bb456c073251669ccb Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Mon, 22 Jul 2024 20:17:44 +0800 Subject: [PATCH 24/29] bump version to 0.1.23 (#862) --- xtuner/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xtuner/version.py b/xtuner/version.py index c77f4bf8f..e4669c188 100644 --- a/xtuner/version.py +++ b/xtuner/version.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -__version__ = '0.1.22' +__version__ = '0.1.23' short_version = __version__ From d2a173a284d5969e80669c6e59dffd055d2570f3 Mon Sep 17 00:00:00 2001 From: LDLINGLINGLING <47373076+LDLINGLINGLING@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:29:27 +0800 Subject: [PATCH 25/29] =?UTF-8?q?readme=E4=B8=AD=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E4=BA=86MiniCPM=E7=9A=84=E6=94=AF=E6=8C=81=20(#869)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: liudan --- README.md | 3 ++- README_zh-CN.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2a8eb2879..263d300c7 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ English | [简体中文](README_zh-CN.md) ## 🎉 News - +- **\[2024/07\]** Support [MiniCPM](xtuner/configs/minicpm/) models! - **\[2024/07\]** Support [DPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo), [ORPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo) and [Reward Model](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/reward_model) training with packed data and sequence parallel! See [documents](https://xtuner.readthedocs.io/en/latest/dpo/overview.html) for more details. - **\[2024/07\]** Support [InternLM 2.5](xtuner/configs/internlm/internlm2_5_chat_7b/) models! - **\[2024/06\]** Support [DeepSeek V2](xtuner/configs/deepseek/deepseek_v2_chat/) models! **2x faster!** @@ -113,6 +113,7 @@ XTuner is an efficient, flexible and full-featured toolkit for fine-tuning large
  • Mixtral
  • DeepSeek V2
  • Gemma
  • +
  • MiniCPM
  • ...
  • diff --git a/README_zh-CN.md b/README_zh-CN.md index 58076210f..f4f0b4b48 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -38,7 +38,7 @@ ## 🎉 更新 - +- **\[2024/07\]** 支持 [MiniCPM](xtuner/configs/minicpm/) 模型! - **\[2024/07\]** 支持训练 [DPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo), [ORPO](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo) 还有 [Reward Model](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/reward_model) ! 并且能够支持打包数据以及序列并行功能! 请参考 [文档](https://xtuner.readthedocs.io/zh-cn/latest/dpo/overview.html) 了解更多信息。 - **\[2024/07\]** 支持 [InternLM 2.5](xtuner/configs/internlm/internlm2_5_chat_7b/) 模型! - **\[2024/06\]** 支持 [DeepSeek V2](xtuner/configs/deepseek/deepseek_v2_chat/) models! **训练速度提升一倍!** @@ -113,6 +113,7 @@ XTuner 是一个高效、灵活、全能的轻量化大模型微调工具库。
  • Mixtral
  • DeepSeek V2
  • Gemma
  • +
  • MiniCPM
  • ...
  • From 01640b00790453de39a63b5019fdd14f7cc6e4d8 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Wed, 31 Jul 2024 18:54:56 +0800 Subject: [PATCH 26/29] [Bug] fix dsv2 attn dispatch (softmax_scale) (#873) fix dsv2 attn dispatch (softmax_scale) --- xtuner/model/modules/dispatch/attention.py | 4 ++++ xtuner/model/modules/dispatch/deepseek_v2.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/xtuner/model/modules/dispatch/attention.py b/xtuner/model/modules/dispatch/attention.py index 2a225795d..e89bb511c 100644 --- a/xtuner/model/modules/dispatch/attention.py +++ b/xtuner/model/modules/dispatch/attention.py @@ -38,6 +38,7 @@ def flash_attn_w_mask( key_states, value_states, attention_mask, + softmax_scale=None, causal=True, dropout_p=0.0, window_size=(-1, -1), # -1 means infinite context window @@ -57,6 +58,7 @@ def flash_attn_w_mask( cu_seqlens_k=cu_seqlens_k, max_seqlen_q=max_seqlen_in_batch_q, max_seqlen_k=max_seqlen_in_batch_k, + softmax_scale=softmax_scale, dropout_p=dropout_p, causal=causal, window_size=window_size) @@ -71,6 +73,7 @@ def varlen_flash_attn( value_states, cumulative_len, max_seqlen, + softmax_scale=None, dropout_p=0., causal=True, window_size=(-1, -1), # -1 means infinite context window @@ -85,6 +88,7 @@ def varlen_flash_attn( cumulative_len, max_seqlen, max_seqlen, + softmax_scale=softmax_scale, dropout_p=dropout_p, return_attn_probs=False, causal=causal, diff --git a/xtuner/model/modules/dispatch/deepseek_v2.py b/xtuner/model/modules/dispatch/deepseek_v2.py index 667d2227c..bfa3ebb6d 100644 --- a/xtuner/model/modules/dispatch/deepseek_v2.py +++ b/xtuner/model/modules/dispatch/deepseek_v2.py @@ -279,6 +279,7 @@ def deepseek_varlen_attn_forward( value_states, cumulative_len, max_seqlen, + softmax_scale=self.softmax_scale, causal=causal, dropout_p=dropout_rate, training=True) @@ -287,6 +288,7 @@ def deepseek_varlen_attn_forward( query_states, key_states, value_states, + softmax_scale=self.softmax_scale, causal=causal, dropout_p=dropout_rate, training=False) From 7dd779b3b4c5a7fb8e00f7511c1635b8ff33979b Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:59:54 +0800 Subject: [PATCH 27/29] [Bug] fix openai_map_fn bugs (#885) fix openai_map_fn bugs --- xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py b/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py index c1798dc45..468e738f7 100644 --- a/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py +++ b/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py @@ -32,7 +32,8 @@ def openai_map_fn(example): elif msg['role'] == 'user': input += msg['content'] elif msg['role'] == 'assistant': - output_with_loss = msg.get('loss', True) + output_with_loss = msg.get('loss', 'True') + output_with_loss = str(output_with_loss) output_with_loss = output_with_loss.lower() == 'true' conversation.append({ 'system': system, From d81b366d5bd5795d5892b0c9ae42ef229ac719f4 Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 9 Aug 2024 18:00:34 +0800 Subject: [PATCH 28/29] support transformers >= 4.43 (#878) --- xtuner/model/modules/dispatch/cohere.py | 36 ++++++++++++++---- xtuner/model/modules/dispatch/mistral.py | 40 +++++++++++++++----- xtuner/model/modules/dispatch/phi3.py | 41 ++++++++++++++++----- xtuner/model/modules/dispatch/qwen2.py | 47 +++++++++++++++++++----- 4 files changed, 129 insertions(+), 35 deletions(-) diff --git a/xtuner/model/modules/dispatch/cohere.py b/xtuner/model/modules/dispatch/cohere.py index d3529f570..8acf06747 100644 --- a/xtuner/model/modules/dispatch/cohere.py +++ b/xtuner/model/modules/dispatch/cohere.py @@ -3,6 +3,8 @@ import torch import torch.distributed as dist +import transformers +from mmengine.utils import digit_version from transformers.models.cohere.modeling_cohere import apply_rotary_pos_emb from xtuner.parallel.sequence import get_sequence_parallel_world_size @@ -18,6 +20,14 @@ class Cache: pass +TRANSFORMERS_VERSION = digit_version(transformers.__version__) +IS_LOW_VERSION_TRANSFORMERS = TRANSFORMERS_VERSION < digit_version('4.43') + +if not IS_LOW_VERSION_TRANSFORMERS: + from transformers.modeling_flash_attention_utils import \ + _flash_attention_forward + + def cohere_attn_forward( self, hidden_states: torch.Tensor, @@ -110,13 +120,25 @@ def cohere_attn_forward( ori_num_head = self.num_heads self.num_heads = query_states.shape[-2] - attn_output = self._flash_attention_forward( - query_states, - key_states, - value_states, - attention_mask, - query_states.shape[1], - dropout=dropout_rate) + if IS_LOW_VERSION_TRANSFORMERS: + attn_output = self._flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_states.shape[1], + dropout=dropout_rate) + else: + attn_output = _flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_states.shape[1], + dropout=dropout_rate, + use_top_left_mask=self._flash_attn_uses_top_left_mask, + is_causal=self.is_causal, + ) if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) diff --git a/xtuner/model/modules/dispatch/mistral.py b/xtuner/model/modules/dispatch/mistral.py index d08b0f00e..dc6c7fed8 100644 --- a/xtuner/model/modules/dispatch/mistral.py +++ b/xtuner/model/modules/dispatch/mistral.py @@ -6,7 +6,9 @@ import torch import torch.distributed as dist import torch.nn as nn +import transformers from mmengine import MessageHub +from mmengine.utils import digit_version from transformers.cache_utils import Cache from transformers.models.mistral.modeling_mistral import (apply_rotary_pos_emb, repeat_kv) @@ -28,6 +30,13 @@ except ImportError: pass +TRANSFORMERS_VERSION = digit_version(transformers.__version__) +IS_LOW_VERSION_TRANSFORMERS = TRANSFORMERS_VERSION < digit_version('4.43') + +if not IS_LOW_VERSION_TRANSFORMERS: + from transformers.modeling_flash_attention_utils import \ + _flash_attention_forward + class MistralRotaryEmbedding(nn.Module): @@ -220,15 +229,28 @@ def mistral_attn_forward( ori_num_head = self.num_heads self.num_heads = query_states.shape[-2] - attn_output = self._flash_attention_forward( - query_states, - key_states, - value_states, - attention_mask, - query_length=query_states.shape[1], - dropout=dropout_rate, - use_sliding_windows=use_sliding_windows, - ) + if IS_LOW_VERSION_TRANSFORMERS: + attn_output = self._flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_length=query_states.shape[1], + dropout=dropout_rate, + use_sliding_windows=use_sliding_windows, + ) + else: + attn_output = _flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_states.shape[1], + dropout=dropout_rate, + sliding_window=getattr(self.config, 'sliding_window', None), + use_top_left_mask=self._flash_attn_uses_top_left_mask, + is_causal=self.is_causal, + ) if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) diff --git a/xtuner/model/modules/dispatch/phi3.py b/xtuner/model/modules/dispatch/phi3.py index 97ebc8d33..10f60f939 100644 --- a/xtuner/model/modules/dispatch/phi3.py +++ b/xtuner/model/modules/dispatch/phi3.py @@ -1,10 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. +import inspect import warnings from typing import Optional, Tuple import torch import torch.distributed as dist +import transformers from mmengine import MessageHub +from mmengine.utils import digit_version from xtuner.parallel.sequence import (get_sequence_parallel_world_size, post_process_for_sequence_parallel_attn, @@ -19,7 +22,12 @@ class Cache: pass -import inspect +TRANSFORMERS_VERSION = digit_version(transformers.__version__) +IS_LOW_VERSION_TRANSFORMERS = TRANSFORMERS_VERSION < digit_version('4.43') + +if not IS_LOW_VERSION_TRANSFORMERS: + from transformers.modeling_flash_attention_utils import \ + _flash_attention_forward _flash_supports_window_size = False try: @@ -239,15 +247,28 @@ def phi3_attn_forward( ori_num_head = self.num_heads self.num_heads = query_states.shape[-2] - attn_output = self._flash_attention_forward( - query_states, - key_states, - value_states, - attention_mask, - query_states.shape[1], - dropout=attn_dropout, - use_sliding_windows=use_sliding_windows, - ) + if IS_LOW_VERSION_TRANSFORMERS: + attn_output = self._flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_states.shape[1], + dropout=attn_dropout, + use_sliding_windows=use_sliding_windows, + ) + else: + attn_output = _flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_states.shape[1], + dropout=attn_dropout, + sliding_window=getattr(self.config, 'sliding_window', None), + use_top_left_mask=self._flash_attn_uses_top_left_mask, + is_causal=self.is_causal, + ) if enable_sequence_parallel: # (b, s, nd // sp_world_size, dim) -> (b, s // sp_world_size, nd, dim) diff --git a/xtuner/model/modules/dispatch/qwen2.py b/xtuner/model/modules/dispatch/qwen2.py index 1c8c5a8d0..20f2f40f3 100644 --- a/xtuner/model/modules/dispatch/qwen2.py +++ b/xtuner/model/modules/dispatch/qwen2.py @@ -5,7 +5,9 @@ import torch import torch.distributed as dist +import transformers from mmengine import MessageHub +from mmengine.utils import digit_version from transformers.cache_utils import Cache from transformers.models.qwen2.modeling_qwen2 import (apply_rotary_pos_emb, repeat_kv) @@ -26,6 +28,13 @@ except ImportError: pass +TRANSFORMERS_VERSION = digit_version(transformers.__version__) +IS_LOW_VERSION_TRANSFORMERS = TRANSFORMERS_VERSION < digit_version('4.43') + +if not IS_LOW_VERSION_TRANSFORMERS: + from transformers.modeling_flash_attention_utils import \ + _flash_attention_forward + def qwen2_attn_forward( self, @@ -157,15 +166,35 @@ def qwen2_attn_forward( ori_num_head = self.num_heads self.num_heads = query_states.shape[-2] - attn_output = self._flash_attention_forward( - query_states, - key_states, - value_states, - attention_mask, - query_length=query_states.shape[1], - dropout=dropout_rate, - use_sliding_windows=use_sliding_windows, - ) + if IS_LOW_VERSION_TRANSFORMERS: + attn_output = self._flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_length=query_states.shape[1], + dropout=dropout_rate, + use_sliding_windows=use_sliding_windows, + ) + else: + if (self.config.use_sliding_window + and getattr(self.config, 'sliding_window', None) is not None + and self.layer_idx >= self.config.max_window_layers): + # There may be bugs here, but we are aligned with Transformers + sliding_window = self.config.sliding_window + else: + sliding_window = None + attn_output = _flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + query_states.shape[1], + dropout=dropout_rate, + sliding_window=sliding_window, + is_causal=self.is_causal, + use_top_left_mask=self._flash_attn_uses_top_left_mask, + ) if enable_sequence_parallel: attn_output = post_process_for_sequence_parallel_attn(attn_output) From 081c8ca874bdbf7a7f8cd0a9e4cba503eaaa7bba Mon Sep 17 00:00:00 2001 From: whcao <41630003+HIT-cwh@users.noreply.github.com> Date: Fri, 9 Aug 2024 18:01:01 +0800 Subject: [PATCH 29/29] Add internlm2 5 cfgs (#872) * add internlm2.5 configs * limit transformers <= 4.42.4 --- requirements/runtime.txt | 4 +- .../internlm2_5_chat_20b_alpaca_e3.py | 202 ++++++++++++++++ .../internlm2_5_chat_20b_qlora_alpaca_e3.py | 219 ++++++++++++++++++ 3 files changed, 424 insertions(+), 1 deletion(-) create mode 100644 xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_alpaca_e3.py create mode 100644 xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_qlora_alpaca_e3.py diff --git a/requirements/runtime.txt b/requirements/runtime.txt index c93ea248b..3a4d2f84e 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -21,5 +21,7 @@ torchvision # Registering a causal mask in `LlamaModel` is not friendly for very large # `max_position_embeddings`. Refer to # https://github.com/huggingface/transformers/blob/v4.38.0/src/transformers/models/llama/modeling_llama.py#L921-L923 -transformers>=4.36.0,!=4.38.0,!=4.38.1,!=4.38.2 +# transformers >= 4.43.0 use _flash_attention_forward but not self._flash_attention_forward +# to calculate attn output which lead to bc braeking +transformers>=4.36.0,!=4.38.0,!=4.38.1,!=4.38.2,<=4.42.4 transformers_stream_generator diff --git a/xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_alpaca_e3.py b/xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_alpaca_e3.py new file mode 100644 index 000000000..f67fc1a22 --- /dev/null +++ b/xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_alpaca_e3.py @@ -0,0 +1,202 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2_5-20b-chat' +use_varlen_attn = False + +# Data +alpaca_en_path = 'tatsu-lab/alpaca' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 1 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-5 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True)) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_en = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_en_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_en, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False) diff --git a/xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_qlora_alpaca_e3.py b/xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_qlora_alpaca_e3.py new file mode 100644 index 000000000..f695e7922 --- /dev/null +++ b/xtuner/configs/internlm/internlm2_5_chat_20b/internlm2_5_chat_20b_qlora_alpaca_e3.py @@ -0,0 +1,219 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from datasets import load_dataset +from mmengine.dataset import DefaultSampler +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from peft import LoraConfig +from torch.optim import AdamW +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig) + +from xtuner.dataset import process_hf_dataset +from xtuner.dataset.collate_fns import default_collate_fn +from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory +from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, + VarlenAttnArgsToMessageHubHook) +from xtuner.engine.runner import TrainLoop +from xtuner.model import SupervisedFinetune +from xtuner.parallel.sequence import SequenceParallelSampler +from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE + +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2_5-20b-chat' +use_varlen_attn = False + +# Data +alpaca_en_path = 'tatsu-lab/alpaca' +prompt_template = PROMPT_TEMPLATE.internlm2_chat +max_length = 2048 +pack_to_max_length = True + +# parallel +sequence_parallel_size = 1 + +# Scheduler & Optimizer +batch_size = 1 # per_device +accumulative_counts = 1 +accumulative_counts *= sequence_parallel_size +dataloader_num_workers = 0 +max_epochs = 3 +optim_type = AdamW +lr = 2e-4 +betas = (0.9, 0.999) +weight_decay = 0 +max_norm = 1 # grad clip +warmup_ratio = 0.03 + +# Save +save_steps = 500 +save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited) + +# Evaluate the generation performance during the training +evaluation_freq = 500 +SYSTEM = SYSTEM_TEMPLATE.alpaca +evaluation_inputs = [ + '请给我介绍五个上海的景点', 'Please tell me five scenic spots in Shanghai' +] + +####################################################################### +# PART 2 Model & Tokenizer # +####################################################################### +tokenizer = dict( + type=AutoTokenizer.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + padding_side='right') + +model = dict( + type=SupervisedFinetune, + use_varlen_attn=use_varlen_attn, + llm=dict( + type=AutoModelForCausalLM.from_pretrained, + pretrained_model_name_or_path=pretrained_model_name_or_path, + trust_remote_code=True, + torch_dtype=torch.float16, + quantization_config=dict( + type=BitsAndBytesConfig, + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4')), + lora=dict( + type=LoraConfig, + r=64, + lora_alpha=16, + lora_dropout=0.1, + bias='none', + task_type='CAUSAL_LM')) + +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +alpaca_en = dict( + type=process_hf_dataset, + dataset=dict(type=load_dataset, path=alpaca_en_path), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=alpaca_map_fn, + template_map_fn=dict( + type=template_map_fn_factory, template=prompt_template), + remove_unused_columns=True, + shuffle_before_pack=True, + pack_to_max_length=pack_to_max_length, + use_varlen_attn=use_varlen_attn) + +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=alpaca_en, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn)) + +####################################################################### +# PART 4 Scheduler & Optimizer # +####################################################################### +# optimizer +optim_wrapper = dict( + type=AmpOptimWrapper, + optimizer=dict( + type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), + clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), + accumulative_counts=accumulative_counts, + loss_scale='dynamic', + dtype='float16') + +# learning policy +# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-5, + by_epoch=True, + begin=0, + end=warmup_ratio * max_epochs, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=0.0, + by_epoch=True, + begin=warmup_ratio * max_epochs, + end=max_epochs, + convert_to_iter_based=True) +] + +# train, val, test setting +train_cfg = dict(type=TrainLoop, max_epochs=max_epochs) + +####################################################################### +# PART 5 Runtime # +####################################################################### +# Log the dialogue periodically during the training process, optional +custom_hooks = [ + dict(type=DatasetInfoHook, tokenizer=tokenizer), + dict( + type=EvaluateChatHook, + tokenizer=tokenizer, + every_n_iters=evaluation_freq, + evaluation_inputs=evaluation_inputs, + system=SYSTEM, + prompt_template=prompt_template) +] + +if use_varlen_attn: + custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)] + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + # print log every 10 iterations. + logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10), + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + # save checkpoint per `save_steps`. + checkpoint=dict( + type=CheckpointHook, + by_epoch=False, + interval=save_steps, + max_keep_ckpts=save_total_limit), + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +visualizer = None + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# set log processor +log_processor = dict(by_epoch=False)