Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Ported quantize.cpp #84

Merged
merged 22 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat(ggml-format): implement writer
  • Loading branch information
philpax committed Apr 25, 2023
commit 196d4f380c5d2f8e26b3a0fcc67062848132137f
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions ggml-format/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ edition = "2021"
[dependencies]
ggml = { path = "../ggml" }
thiserror = "1.0"

[dev-dependencies]
rand = "0.8"
6 changes: 5 additions & 1 deletion ggml-format/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
pub mod util;

mod loader;
mod saver;
#[cfg(test)]
mod tests;

pub use loader::{
load_model_from_reader, LoadError, LoadHandler, PartialHyperparameters, TensorInfo,
data_size, load_model, LoadError, LoadHandler, PartialHyperparameters, TensorInfo,
};
pub use saver::{save_model, SaveError, SaveHandler, TensorData};

/// The type of a tensor element.
pub type ElementType = ggml::Type;
Expand Down
24 changes: 14 additions & 10 deletions ggml-format/src/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,15 @@ impl TensorInfo {

/// Calculate the size of the tensor's values in bytes.
pub fn calc_size(&self) -> usize {
let mut size = ggml::type_size(self.element_type);
for &dim in self.dims() {
size *= dim;
}
size / ggml::blck_size(self.element_type)
data_size(self.element_type, self.dims().iter().product())
}
}

/// Returns the size occupied by a tensor's data in bytes given the element type and number of elements.
pub fn data_size(element_type: ElementType, n_elements: usize) -> usize {
(ggml::type_size(element_type) * n_elements) / ggml::blck_size(element_type)
}

#[derive(Debug, Clone)]
/// Information present within the hyperparameters that is required to continue loading the model.
pub struct PartialHyperparameters {
Expand All @@ -82,22 +83,25 @@ pub struct PartialHyperparameters {
}

/// A handler for loading a model.
pub trait LoadHandler<E: Error, R: BufRead + Seek> {
pub trait LoadHandler<E: Error> {
/// Called when the container type is read.
fn container_type(&mut self, container_type: ContainerType) -> Result<(), E>;
/// Called when a vocabulary token is read.
fn vocabulary_token(&mut self, i: usize, token: Vec<u8>, score: f32) -> Result<(), E>;
/// Called when the hyperparameters need to be read.
/// You must read the hyperparameters for your model here.
fn read_hyperparameters(&mut self, reader: &mut R) -> Result<PartialHyperparameters, E>;
fn read_hyperparameters(
&mut self,
reader: &mut dyn BufRead,
) -> Result<PartialHyperparameters, E>;
/// Called when a new tensor is found.
fn tensor_buffer(&mut self, info: TensorInfo) -> Result<(), E>;
}

/// Load a model from a `reader` with the `handler`, which will be called when certain events occur.
pub fn load_model_from_reader<E: Error, R: BufRead + Seek>(
pub fn load_model<E: Error, R: BufRead + Seek>(
reader: &mut R,
handler: &mut impl LoadHandler<E, R>,
handler: &mut impl LoadHandler<E>,
) -> Result<(), LoadError<E>> {
// Verify magic
let container_type: ContainerType = match read_u32(reader)? {
Expand Down Expand Up @@ -156,7 +160,7 @@ pub fn load_model_from_reader<E: Error, R: BufRead + Seek>(
/// align to 4 bytes before reading tensor weights
fn load_weights<E: Error, R: BufRead + Seek>(
reader: &mut R,
handler: &mut impl LoadHandler<E, R>,
handler: &mut impl LoadHandler<E>,
align: bool,
) -> Result<(), LoadError<E>> {
while has_data_left(reader)? {
Expand Down
119 changes: 119 additions & 0 deletions ggml-format/src/saver.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
use std::{
error::Error,
io::{Seek, Write},
};

use crate::{util, ElementType};

#[derive(Debug, thiserror::Error)]
/// Errors that can occur while writing a model.
pub enum SaveError<E: Error> {
#[error("non-specific I/O error")]
/// A non-specific IO error.
Io(#[from] std::io::Error),
#[error("invalid integer conversion")]
/// One of the integers encountered could not be converted to a more appropriate type.
InvalidIntegerConversion(#[from] std::num::TryFromIntError),
#[error("implementation error")]
/// An error `E` was returned by the implementation of the loader.
ImplementationError(#[source] E),
#[error("invariant broken: {0}")]
/// An invariant was broken.
InvariantBroken(String),
}

/// A handler for saving a model.
pub trait SaveHandler<E: Error> {
/// Called when the hyperparameters are to be written.
/// You must write the hyperparameters to the given writer.
fn write_hyperparameters(&mut self, writer: &mut dyn Write) -> Result<(), E>;

/// Called when a tensor is to be written.
/// You must return data for the tensor to be saved.
fn tensor_data(&mut self, tensor_name: &str) -> Result<TensorData, E>;
}

/// Information about a tensor that is to be saved.
#[derive(Clone, PartialEq, Debug)]
pub struct TensorData {
/// The number of dimensions in the tensor.
pub n_dims: usize,
/// The dimensions of the tensor.
pub dims: [usize; 2],
/// The type of the elements in the tensor.
pub element_type: ElementType,
/// The data to save to disk.
// TODO: This can be done more efficiently by borrowing the data, but
// I wanted to avoid the lifetime parameter for now, especially as
// the naive solution would borrow `TensorData` for the lifetime of the
// handler, which is obviously not ideal if you're trying to transcode
// an existing file tensor-by-tensor.
pub data: Vec<u8>,
}

/// Saves a model to the given writer.
///
/// Only GGJT is supported.
pub fn save_model<E: Error, W: Write + Seek>(
writer: &mut W,
handler: &mut dyn SaveHandler<E>,
vocabulary: &[(Vec<u8>, f32)],
tensor_names: &[String],
) -> Result<(), SaveError<E>> {
// Write header and hyperparameters
util::write_u32(writer, ggml::FILE_MAGIC_GGJT)?;
util::write_u32(writer, ggml::FORMAT_VERSION)?;
handler
.write_hyperparameters(writer)
.map_err(SaveError::ImplementationError)?;

// Write vocabulary
for (token, score) in vocabulary {
util::write_u32(writer, token.len().try_into()?)?;
writer.write_all(token)?;
util::write_f32(writer, *score)?;
}

// Write tensors
for name in tensor_names {
let TensorData {
n_dims,
dims,
element_type,
data,
} = handler
.tensor_data(name)
.map_err(SaveError::ImplementationError)?;

match element_type {
ElementType::Q4_0 | ElementType::Q4_1 => {
if dims[0] % 64 != 0 {
return Err(SaveError::InvariantBroken(format!("{dims:?}[0] % 64 == 0")));
}
}
_ => {}
}

// Write tensor header
util::write_i32(writer, n_dims.try_into()?)?;
util::write_i32(writer, name.len().try_into()?)?;
util::write_i32(writer, element_type.into())?;
for &dim in &dims[0..n_dims] {
util::write_i32(writer, dim.try_into()?)?;
}

// Write tensor name
writer.write_all(name.as_bytes())?;

// Align to nearest 32 bytes
let offset_curr = writer.stream_position()?;
let offset_aligned = (offset_curr + 31) & !31;
let padding = usize::try_from(offset_aligned - offset_curr)?;
writer.write_all(&vec![0; padding])?;

// Write tensor data
writer.write_all(&data)?;
}

Ok(())
}
184 changes: 184 additions & 0 deletions ggml-format/src/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
use std::{
collections::BTreeMap,
error::Error,
io::{BufRead, Write},
};

use crate::*;
use rand::{distributions::Uniform, prelude::*};

#[derive(Debug)]
struct DummyError;
impl std::fmt::Display for DummyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self, f)
}
}
impl Error for DummyError {}

#[test]
fn can_roundtrip_loader_and_saver() {
let vocabulary = vec![
("blazingly".as_bytes().to_vec(), 0.1),
("fast".as_bytes().to_vec(), 0.2),
("memory".as_bytes().to_vec(), 0.3),
("efficient".as_bytes().to_vec(), 0.4),
];

let mut rng = rand::thread_rng();
let element_type = ggml::Type::F16;
let model = Model {
hyperparameters: Hyperparameters {
some_hyperparameter: random(),
some_other_hyperparameter: random(),
vocabulary_size: vocabulary.len().try_into().unwrap(),
},
vocabulary,
tensors: (0..10)
.map(|i| {
let n_dims = Uniform::from(1..3).sample(&mut rng);
let dims = (0..n_dims)
.map(|_| Uniform::from(1..10).sample(&mut rng))
.chain(std::iter::repeat(1).take(2 - n_dims))
.collect::<Vec<_>>();

let n_elements = dims.iter().product::<usize>();
let data = (0..data_size(element_type, n_elements))
.map(|_| random())
.collect::<Vec<_>>();

(
format!("tensor_{}", i),
TensorData {
n_dims,
dims: dims.try_into().unwrap(),
element_type,
data,
},
)
})
.collect(),
};

// Save the model.
let mut buffer = Vec::new();
let mut cursor = std::io::Cursor::new(&mut buffer);
let mut save_handler = MockSaveHandler { model: &model };
save_model(
&mut cursor,
&mut save_handler,
&model.vocabulary,
&model.tensors.keys().cloned().collect::<Vec<String>>(),
)
.unwrap();

// Load the model and confirm that it is the same as the original.
let mut cursor = std::io::Cursor::new(&buffer);
let mut load_handler = MockLoadHandler {
data: &buffer,
loaded_model: Model::default(),
};
load_model(&mut cursor, &mut load_handler).unwrap();
assert_eq!(load_handler.loaded_model, model);
}

#[derive(Default, PartialEq, Debug)]
struct Hyperparameters {
some_hyperparameter: u32,
some_other_hyperparameter: u32,
vocabulary_size: u32,
}
impl Hyperparameters {
fn read(reader: &mut dyn BufRead) -> Result<Self, std::io::Error> {
Ok(Self {
some_hyperparameter: util::read_u32(reader)?,
some_other_hyperparameter: util::read_u32(reader)? as u32,
vocabulary_size: util::read_u32(reader)?,
})
}

fn write(&self, writer: &mut dyn Write) -> Result<(), std::io::Error> {
util::write_u32(writer, self.some_hyperparameter)?;
util::write_u32(writer, self.some_other_hyperparameter as u32)?;
util::write_u32(writer, self.vocabulary_size)?;
Ok(())
}
}

#[derive(Default, PartialEq, Debug)]
struct Model {
hyperparameters: Hyperparameters,
vocabulary: Vec<(Vec<u8>, f32)>,
tensors: BTreeMap<String, TensorData>,
}

struct MockSaveHandler<'a> {
model: &'a Model,
}
impl SaveHandler<DummyError> for MockSaveHandler<'_> {
fn write_hyperparameters(&mut self, writer: &mut dyn Write) -> Result<(), DummyError> {
self.model.hyperparameters.write(writer).unwrap();
Ok(())
}

fn tensor_data(&mut self, tensor_name: &str) -> Result<TensorData, DummyError> {
self.model
.tensors
.get(tensor_name)
.cloned()
.ok_or(DummyError)
}
}

struct MockLoadHandler<'a> {
data: &'a [u8],
loaded_model: Model,
}
impl LoadHandler<DummyError> for MockLoadHandler<'_> {
fn container_type(&mut self, container_type: ContainerType) -> Result<(), DummyError> {
assert_eq!(container_type, ContainerType::Ggjt);
Ok(())
}

fn vocabulary_token(&mut self, i: usize, token: Vec<u8>, score: f32) -> Result<(), DummyError> {
assert_eq!(i, self.loaded_model.vocabulary.len());
self.loaded_model.vocabulary.push((token, score));
Ok(())
}

fn read_hyperparameters(
&mut self,
reader: &mut dyn BufRead,
) -> Result<PartialHyperparameters, DummyError> {
self.loaded_model.hyperparameters = Hyperparameters::read(reader).unwrap();
Ok(PartialHyperparameters {
n_vocab: self
.loaded_model
.hyperparameters
.vocabulary_size
.try_into()
.unwrap(),
})
}

fn tensor_buffer(&mut self, info: TensorInfo) -> Result<(), DummyError> {
self.loaded_model.tensors.insert(
info.name,
TensorData {
n_dims: info.n_dims,
dims: info.dims,
element_type: info.element_type,
data: {
let n_bytes = info.n_elements * ggml::type_size(info.element_type);
let mut data = vec![0; n_bytes];
data.copy_from_slice(
&self.data
[info.start_offset as usize..info.start_offset as usize + n_bytes],
);
data
},
},
);
Ok(())
}
}
Loading