Merge pull request #97 from honglu2875/honglu/dev

Allow YaRN finetuning; Allow reading safetensors
EleutherAI · Feb 4, 2024 · 571a797 · 571a797
2 parents 4ed0a5b + 2233495
commit 571a797
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 18 deletions.
diff --git a/aria/data/midi.py b/aria/data/midi.py
@@ -1,4 +1,5 @@
 """Utils for data/MIDI processing."""
+
 import hashlib
 import json
 import re
@@ -319,9 +320,9 @@ def _extract_track_data(track: mido.MidiTrack):
  if len(notes_to_close) > 0 and len(notes_to_keep) > 0:
  # Note-on on the same tick but we already closed
  # some previous notes -> it will continue, keep it.
- last_note_on[
- (message.note, message.channel)
- ] = notes_to_keep
+ last_note_on[(message.note, message.channel)] = (
+ notes_to_keep
+ )
  else:
  # Remove the last note on for this instrument
  del last_note_on[(message.note, message.channel)]

diff --git a/aria/model/model.py b/aria/model/model.py
@@ -1,4 +1,5 @@
 """Includes (PyTorch) transformer model and config classes."""
+
 from dataclasses import dataclass
 from typing import Optional, Union
 

diff --git a/aria/run.py b/aria/run.py
@@ -137,7 +137,7 @@ def sample(args):
  from aria.tokenizer import RelTokenizer, AbsTokenizer
  from aria.sample import greedy_sample
  from aria.data.midi import MidiDict
- from aria.utils import midi_to_audio
+ from aria.utils import midi_to_audio, _load_weight
 
  if not cuda_is_available():
  print("CUDA device is not available. Using CPU instead.")
@@ -150,7 +150,7 @@ def sample(args):
  )
 
  ckpt_path = _get_ckpt_path(args.c) # let user input path if not provided
- model_state = torch.load(ckpt_path, map_location=device)
+ model_state = _load_weight(ckpt_path, device=device.type)
  model_name = _get_model_name(
  args.m, model_state
  ) # infer model name if not provided

diff --git a/aria/sample.py b/aria/sample.py
@@ -1,4 +1,5 @@
 """Contains generation/sampling code"""
+
 # This file contains code from https://github.com/facebookresearch/llama which
 # is available under the following license:
 

diff --git a/aria/train.py b/aria/train.py
@@ -24,6 +24,7 @@
  PretrainingDataset,
  FinetuningDataset,
 )
+from aria.utils import _load_weight
 
 
 # ----- USAGE -----
@@ -669,12 +670,16 @@ def resume_train(
  else:
  raise Exception
 
- assert (
- train_dataloader.dataset.max_seq_len == model_config.max_seq_len
- ), "max_seq_len differs between datasets and model config"
- assert (
- val_dataloader.dataset.max_seq_len == model_config.max_seq_len
- ), "max_seq_len differs between datasets and model config"
+ if (
+ model_config.yarn_config is None
+ or model_config.yarn_config.scale <= 1.0
+ ):
+ assert (
+ train_dataloader.dataset.max_seq_len == model_config.max_seq_len
+ ), "max_seq_len differs between datasets and model config"
+ assert (
+ val_dataloader.dataset.max_seq_len == model_config.max_seq_len
+ ), "max_seq_len differs between datasets and model config"
 
  (
  model,
@@ -781,7 +786,7 @@ def train(
  logger.info(f"Loaded model with config: {load_model_config(model_name)}")
  if mode == "finetune":
  try:
- model.load_state_dict(torch.load(finetune_cp_path))
+ model.load_state_dict(_load_weight(finetune_cp_path))
  except Exception as e:
  raise Exception(
  f"Failed to load checkpoint: {e}\n"
@@ -823,12 +828,16 @@ def train(
  else:
  raise Exception
 
- assert (
- train_dataloader.dataset.max_seq_len == model_config.max_seq_len
- ), "max_seq_len differs between datasets and model config"
- assert (
- val_dataloader.dataset.max_seq_len == model_config.max_seq_len
- ), "max_seq_len differs between datasets and model config"
+ if (
+ model_config.yarn_config is None
+ or model_config.yarn_config.scale <= 1.0
+ ):
+ assert (
+ train_dataloader.dataset.max_seq_len == model_config.max_seq_len
+ ), "max_seq_len differs between datasets and model config"
+ assert (
+ val_dataloader.dataset.max_seq_len == model_config.max_seq_len
+ ), "max_seq_len differs between datasets and model config"
 
  (
  model,

diff --git a/aria/utils.py b/aria/utils.py
@@ -59,3 +59,18 @@ def midi_to_audio(mid_path: str, soundfont_path: str | None = None):
  print(e)
 
  print(f"Saved files: \n{wav_path}\n{mp3_path}")
+
+
+def _load_weight(ckpt_path: str, device="cpu"):
+ if ckpt_path.endswith("safetensors"):
+ try:
+ from safetensors.torch import load_file
+ except ImportError as e:
+ raise ImportError(
+ f"Please install safetensors in order to read from the checkpoint: {ckpt_path}"
+ ) from e
+ return load_file(ckpt_path, device=device)
+ else:
+ import torch
+
+ return torch.load(ckpt_path, map_location=device)