remove landmark attn and xpos rope implementations (OpenAccess-AI-Col…

…lective#1010)
jinwonkim93 · Dec 28, 2023 · 70b46ca · 70b46ca
1 parent 85dd4d5
commit 70b46ca
Show file tree

Hide file tree

Showing 6 changed files with 1 addition and 1,404 deletions.
diff --git a/README.md b/README.md
@@ -798,11 +798,6 @@ flash_attn_fuse_mlp: # Whether to fuse part of the MLP into a single operation
 # Whether to use scaled-dot-product attention
 # https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
 sdp_attention:
-# Landmark attention (only llama)
-landmark_attention:
-# xpos RoPE see https://github.com/kaiokendev/cutoff-len-is-context-len/blob/main/util/xpos_rope_llama_monkey_patch.py
-# LLaMA only
-xpos_rope:
 
 # Resume from a specific checkpoint dir
 resume_from_checkpoint:

diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py
@@ -103,14 +103,6 @@ def do_inference(
  importlib.import_module("axolotl.prompters"), prompter
  )
 
- if cfg.landmark_attention:
- from axolotl.monkeypatch.llama_landmark_attn import set_model_mem_id
-
- set_model_mem_id(model, tokenizer)
- model.set_mem_cache_args(
- max_seq_len=255, mem_freq=50, top_k=5, max_cache_size=None
- )
-
  model = model.to(cfg.device)
 
  while True:
@@ -176,14 +168,6 @@ def do_inference_gradio(
  importlib.import_module("axolotl.prompters"), prompter
  )
 
- if cfg.landmark_attention:
- from axolotl.monkeypatch.llama_landmark_attn import set_model_mem_id
-
- set_model_mem_id(model, tokenizer)
- model.set_mem_cache_args(
- max_seq_len=255, mem_freq=50, top_k=5, max_cache_size=None
- )
-
  model = model.to(cfg.device)
 
  def generate(instruction):

diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py
@@ -9,7 +9,7 @@
 import sys
 from abc import abstractmethod
 from dataclasses import dataclass, field
-from functools import partial, wraps
+from functools import wraps
 from pathlib import Path
 from typing import Optional
 
@@ -780,26 +780,6 @@ def build(self, total_num_steps):
  # https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html
  data_collator_kwargs["pad_to_multiple_of"] = 64
 
- if self.cfg.is_llama_derived_model and self.cfg.landmark_attention:
- from axolotl.monkeypatch.llama_landmark_attn import (
- add_mem_tokens,
- get_mem_id,
- set_model_mem_id,
- )
-
- set_model_mem_id(self.model, self.tokenizer)
-
- LOG.info("Adding landmark attention tokens to dataset")
-
- for dataset in [self.train_dataset, self.eval_dataset]:
- dataset = dataset.map(
- partial(
- add_mem_tokens, mem_freq=50, mem_id=get_mem_id(self.tokenizer)
- ),
- batched=False,
- num_proc=32,
- )
-
  trainer_cls = self._get_trainer_cls()
  trainer_kwargs, trainer_cls = self.hook_pre_create_trainer(
  trainer_kwargs, trainer_cls