Skip to content

Commit

Permalink
Initial release
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Sep 18, 2022
0 parents commit fb558f7
Show file tree
Hide file tree
Showing 34 changed files with 11,488 additions and 0 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
build/
build-debug/
build-*/

compile_commands.json

.exrc
.cache

src/arm_neon.h
71 changes: 71 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
cmake_minimum_required (VERSION 3.0)
project(ggml VERSION 0.1.0)

set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(GGML_STANDALONE ON)
include(cmake/GitVars.cmake)
include(cmake/BuildTypes.cmake)
else()
set(GGML_STANDALONE OFF)
endif()

# options

option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)

option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)

option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})

# sanitizers

if (GGML_SANITIZE_THREAD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
endif()

if (GGML_SANITIZE_ADDRESS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
endif()

if (GGML_SANITIZE_UNDEFINED)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
endif()

#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")

# dependencies

set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 11)

find_package(Threads REQUIRED)

# main

if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
endif ()

add_subdirectory(src)

if (GGML_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
endif ()

if (GGML_BUILD_EXAMPLES)
add_subdirectory(examples)
endif ()
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2022 Georgi Gerganov

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
51 changes: 51 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# ggml

Tensor library in C for machine learning

## Features

- Automatic differentiation (WIP)
- 16-bit float support
- ADAM and L-BFGS optimizers
- Optimized for Arm64 architectures (i.e. MacBook M1) via NEON intrinsics
- On x86 architectures utilzes AVX intrinsics
- No third-party dependencies
- Zero memory allocations during runtime

## Local GPT inference

Using ggml you can run [GPT-2](examples/gpt-2) and [GPT-J](examples/gpt-j) inference locally on your computer without any additional software or hardware. You don't even need to install python or any other third-party library.

The example programs are implemented in C++. They run entirely on the CPU.

Here is how to use them:

```bash
# Build ggml + examples
git clone https://github.com/ggerganov/ggml
cd ggml
mkdir build && cd build
cmake ..
make -j4 gpt-2 gpt-j

# Run the GPT-2 small 117M model
../examples/gpt-2/download-ggml-model.sh 117M
./bin/gpt-2 -m models/gpt-2-117M/ggml-model.bin -p "This is an example"

# Run the GPT-J 6B model (requires 12GB disk space and 16GB CPU RAM)
../examples/gpt-j/download-ggml-model.sh 6B
./bin/gpt-j -m models/gpt-j-6B/ggml-model.bin -p "This is an example"
```

This is the inference speed for the different models on my MacBook M1 Pro:

| Model | Size | Time / Token |
| --- | --- | --- |
| GPT-2 | 117M | 5 ms |
| GPT-2 | 345M | 12 ms |
| GPT-2 | 774M | 23 ms |
| GPT-2 | 1558M | 42 ms |
| --- | --- | --- |
| GPT-J | 6B | 125 ms |

For more information, checkout the corresponding programs in the [examples](examples) folder.
54 changes: 54 additions & 0 deletions cmake/BuildTypes.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Add new build types

# ReleaseGG - Release with enabled asserts

SET(CMAKE_CXX_FLAGS_RELEASEGG
"-O3"
CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
FORCE )
SET(CMAKE_C_FLAGS_RELEASEGG
"-O3"
CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
FORCE )
SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
""
CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
FORCE )
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
""
CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
FORCE )
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_RELEASEGG
CMAKE_C_FLAGS_RELEASEGG
CMAKE_EXE_LINKER_FLAGS_RELEASEGG
CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )

# RelWithDebInfoGG - RelWithDebInfo with enabled asserts

SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
"-O2 -g"
CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
"-O2 -g"
CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
""
CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
""
CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
FORCE )
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
CMAKE_C_FLAGS_RELWITHDEBINFOGG
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
endif()
22 changes: 22 additions & 0 deletions cmake/GitVars.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
find_package(Git)

# the commit's SHA1
execute_process(COMMAND
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_SHA1
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

# the date of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_DATE
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

# the subject of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%s
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
5 changes: 5 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
add_library(ggml_utils STATIC utils.cpp)
target_include_directories(ggml_utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

add_subdirectory(gpt-2)
add_subdirectory(gpt-j)
6 changes: 6 additions & 0 deletions examples/gpt-2/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#
# gpt-2

set(TEST_TARGET gpt-2)
add_executable(${TEST_TARGET} main.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils)
126 changes: 126 additions & 0 deletions examples/gpt-2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# gpt-2

This is a C++ example running GPT-2 inference using the [ggml](https://github.com/ggerganov/ggml) library.
The enitre code of the example is in [main.cpp](main.cpp).

The program runs on the CPU - no video card is required.

The example supports the following models:

| Model | Description | Disk Size |
| --- | --- | --- |
| 117M | Small model | 240 MB |
| 345M | Medium model | 680 MB |
| 774M | Large model | 1.5 GB |
| 1558M | XL model | 3.0 GB |

Sample performance on MacBook M1 Pro:

| Model | Size | Time / Token |
| --- | --- | --- |
| GPT-2 | 117M | 5 ms |
| GPT-2 | 345M | 12 ms |
| GPT-2 | 774M | 23 ms |
| GPT-2 | 1558M | 42 ms |

Sample output:

```
$ ./bin/gpt-2 -h
usage: ./bin/gpt-2 [options]
options:
-h, --help show this help message and exit
-s SEED, --seed SEED RNG seed (default: -1)
-t N, --threads N number of threads to use during computation (default: 8)
-p PROMPT, --prompt PROMPT
prompt to start generation with (default: random)
-n N, --n_predict N number of tokens to predict (default: 200)
--top_k N top-k sampling (default: 40)
--top_p N top-p sampling (default: 0.9)
--temp N temperature (default: 1.0)
-b N, --batch_size N batch size for prompt processing (default: 8)
-m FNAME, --model FNAME
model path (default: models/gpt-2-117M/ggml-model.bin)
$ ./bin/gpt-2
gpt2_model_load: loading model from 'models/gpt-2-117M/ggml-model.bin'
gpt2_model_load: n_vocab = 50257
gpt2_model_load: n_ctx = 1024
gpt2_model_load: n_embd = 768
gpt2_model_load: n_head = 12
gpt2_model_load: n_layer = 12
gpt2_model_load: f16 = 1
gpt2_model_load: ggml ctx size = 311.12 MB
gpt2_model_load: memory size = 72.00 MB, n_mem = 12288
gpt2_model_load: model size = 239.08 MB
main: number of tokens in prompt = 1
So this is going to be the end of the line for us.
If the Dolphins continue to do their business, it's possible that the team could make a bid to bring in new defensive coordinator Scott Linehan.
Linehan's job is a little daunting, but he's a great coach and an excellent coach. I don't believe we're going to make the playoffs.
We're going to have to work hard to keep our heads down and get ready to go.<|endoftext|>
main: mem per token = 2048612 bytes
main: load time = 106.32 ms
main: sample time = 7.10 ms
main: predict time = 506.40 ms / 5.06 ms per token
main: total time = 629.84 ms
```

## Downloading and converting the original models

You can download the original model files using the [download-model.sh](download-model.sh) Bash script.
The model is in Tensorflow format, so before using it with ggml, we need to convert it to appropriate format.
This is done via the [convert-ckpt-to-ggml.py](convert-ckpt-to-ggml.py) python script.

Here is the entire process for the GPT-2 117M model:

```
cd ggml/build
../examples/gpt-2/download-model.sh 117M
Downloading model 117M ...
models/gpt-2-117M/checkpoint 100%[=============================>] 77 --.-KB/s in 0s
models/gpt-2-117M/encoder.json 100%[=============================>] 1018K 1.20MB/s in 0.8s
models/gpt-2-117M/hparams.json 100%[=============================>] 90 --.-KB/s in 0s
models/gpt-2-117M/model.ckpt.data-00000-of-00001 100%[=============================>] 474.70M 1.21MB/s in 8m 39s
models/gpt-2-117M/model.ckpt.index 100%[=============================>] 5.09K --.-KB/s in 0s
models/gpt-2-117M/model.ckpt.meta 100%[=============================>] 460.11K 806KB/s in 0.6s
models/gpt-2-117M/vocab.bpe 100%[=============================>] 445.62K 799KB/s in 0.6s
Done! Model '117M' saved in 'models/gpt-2-117M/'
Run the convert-ckpt-to-ggml.py script to convert the model to ggml format.
python /Users/john/ggml/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-117M/
```

This conversion requires that you have python and Tensorflow installed on your computer.
Still, if you want to avoid this, you can download the already converted ggml models as
described below.

## Downloading the ggml model directly

For convenience, I will be hosting the converted ggml model files in order to make it easier to run the examples.
This way, you can directly download a single binary file and start using it. No python or Tensorflow is required.

Here is how to get the 117M ggml model:

```
cd ggml/build
../examples/gpt-2/download-ggml-model.sh 117M
Downloading ggml model 117M ...
models/gpt-2-117M/ggml-model.bin 100%[===============================>] 239.58M 8.52MB/s in 28s
Done! Model '117M' saved in 'models/gpt-2-117M/ggml-model.bin'
You can now use it like this:
$ ./bin/gpt-2 -m models/gpt-2-117M/ggml-model.bin -p "This is an example"
```

At some point, I might stop hosting these models. So in that case, simply revert to the manual process above.
Loading

0 comments on commit fb558f7

Please sign in to comment.