Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Ported quantize.cpp #84

Merged
merged 22 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' of github.com:rustformers/llama-rs into quantize
  • Loading branch information
philpax committed Apr 23, 2023
commit b3a932ee730aaeb8caeffc1ac3eefcbf1457a79f
39 changes: 17 additions & 22 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions ggml-loader/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ pub enum LoadError<T> {
#[error("invalid file magic number: {0}")]
InvalidMagic(u32),

#[error("invalid ggml format: version={0}")]
InvalidFormatVersion(u32),
#[error("invalid ggml format: format={0:?} version={1}")]
InvalidFormatVersion(ContainerType, u32),

#[error("{0}")]
Io(#[from] std::io::Error),
Expand Down Expand Up @@ -129,7 +129,7 @@ pub fn load_model_from_reader<T, R: BufRead + Seek>(
ContainerType::GGMF | ContainerType::GGJT => {
let _version: u32 = match read_u32(reader)? {
ggml::FORMAT_VERSION => ggml::FORMAT_VERSION,
version => return Err(LoadError::InvalidFormatVersion(version)),
version => return Err(LoadError::InvalidFormatVersion(container_type, version)),
};
}
ContainerType::GGML => {}
Expand Down
2 changes: 1 addition & 1 deletion llama-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ half = { version = "2.2.1", optional = true }

[features]
convert = ["dep:serde_json", "dep:protobuf", "dep:rust_tokenizers"]
quantize = ["dep:half"]
quantize = ["dep:half"]
41 changes: 1 addition & 40 deletions llama-rs/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,56 +2,18 @@ use crate::LoadError;
pub use std::fs::File;
pub use std::io::{BufRead, BufReader, BufWriter, Read, Write};

pub fn read_bytes<const N: usize>(reader: &mut impl BufRead) -> Result<[u8; N], LoadError> {
let mut bytes = [0u8; N];
read(reader, &mut bytes)?;
Ok(bytes)
}

pub fn read_bytes_with_len(reader: &mut impl BufRead, len: usize) -> Result<Vec<u8>, LoadError> {
let mut bytes = vec![0u8; len];
read(reader, &mut bytes)?;
Ok(bytes)
}

pub fn read_i32(reader: &mut impl BufRead) -> Result<i32, LoadError> {
Ok(i32::from_le_bytes(read_bytes::<4>(reader)?))
}

pub fn rw_i32(reader: &mut impl BufRead, writer: &mut impl Write) -> Result<i32, LoadError> {
Ok(i32::from_le_bytes(rw::<4>(reader, writer)?))
}

pub fn read_u32(reader: &mut impl BufRead) -> Result<u32, LoadError> {
Ok(u32::from_le_bytes(read_bytes::<4>(reader)?))
}

pub fn rw_u32(reader: &mut impl BufRead, writer: &mut impl Write) -> Result<u32, LoadError> {
Ok(u32::from_le_bytes(rw::<4>(reader, writer)?))
}

pub fn read_f32(reader: &mut impl BufRead) -> Result<f32, LoadError> {
Ok(f32::from_le_bytes(read_bytes::<4>(reader)?))
}

pub fn rw_f32(reader: &mut impl BufRead, writer: &mut impl Write) -> Result<f32, LoadError> {
Ok(f32::from_le_bytes(rw::<4>(reader, writer)?))
}

/// Helper function. Reads a string from the buffer and returns it.
pub fn read_string(reader: &mut BufReader<File>, len: usize) -> Result<String, LoadError> {
Ok(String::from_utf8(read_bytes_with_len(reader, len)?)?)
}

fn read(reader: &mut impl BufRead, bytes: &mut [u8]) -> Result<(), LoadError> {
reader
.read_exact(bytes)
.map_err(|e| LoadError::ReadExactFailed {
source: e,
bytes: bytes.len(),
})
}

pub fn rw_bytes_with_len(
reader: &mut impl BufRead,
writer: &mut impl Write,
Expand All @@ -72,8 +34,7 @@ fn rw<const N: usize>(
reader: &mut impl BufRead,
writer: &mut impl Write,
) -> Result<[u8; N], LoadError> {
let mut bytes = [0u8; N];
read(reader, &mut bytes)?;
let bytes: [u8; N] = ggml_loader::util::read_bytes(reader)?;
writer.write_all(&bytes)?;
Ok(bytes)
}
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.