Initial release

ggerganov · Sep 18, 2022 · fb558f7 · fb558f7
commit fb558f7
Show file tree

Hide file tree

Showing 34 changed files with 11,488 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+build/
+build-debug/
+build-*/
+
+compile_commands.json
+
+.exrc
+.cache
+
+src/arm_neon.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,71 @@
+cmake_minimum_required (VERSION 3.0)
+project(ggml VERSION 0.1.0)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+ set(GGML_STANDALONE ON)
+ include(cmake/GitVars.cmake)
+ include(cmake/BuildTypes.cmake)
+else()
+ set(GGML_STANDALONE OFF)
+endif()
+
+# options
+
+option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
+option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
+
+option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
+option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
+option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
+
+option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
+option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
+
+# sanitizers
+
+if (GGML_SANITIZE_THREAD)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
+endif()
+
+if (GGML_SANITIZE_ADDRESS)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+endif()
+
+if (GGML_SANITIZE_UNDEFINED)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+endif()
+
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
+
+# dependencies
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 11)
+
+find_package(Threads REQUIRED)
+
+# main
+
+if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
+endif ()
+
+add_subdirectory(src)
+
+if (GGML_BUILD_TESTS)
+ enable_testing()
+ add_subdirectory(tests)
+endif ()
+
+if (GGML_BUILD_EXAMPLES)
+ add_subdirectory(examples)
+endif ()
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Georgi Gerganov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,51 @@
+# ggml
+
+Tensor library in C for machine learning
+
+## Features
+
+- Automatic differentiation (WIP)
+- 16-bit float support
+- ADAM and L-BFGS optimizers
+- Optimized for Arm64 architectures (i.e. MacBook M1) via NEON intrinsics
+- On x86 architectures utilzes AVX intrinsics
+- No third-party dependencies
+- Zero memory allocations during runtime
+
+## Local GPT inference
+
+Using ggml you can run [GPT-2](examples/gpt-2) and [GPT-J](examples/gpt-j) inference locally on your computer without any additional software or hardware. You don't even need to install python or any other third-party library.
+
+The example programs are implemented in C++. They run entirely on the CPU.
+
+Here is how to use them:
+
+```bash
+# Build ggml + examples
+git clone https://github.com/ggerganov/ggml
+cd ggml
+mkdir build && cd build
+cmake ..
+make -j4 gpt-2 gpt-j
+
+# Run the GPT-2 small 117M model
+../examples/gpt-2/download-ggml-model.sh 117M
+./bin/gpt-2 -m models/gpt-2-117M/ggml-model.bin -p "This is an example"
+
+# Run the GPT-J 6B model (requires 12GB disk space and 16GB CPU RAM)
+../examples/gpt-j/download-ggml-model.sh 6B
+./bin/gpt-j -m models/gpt-j-6B/ggml-model.bin -p "This is an example"
+```
+
+This is the inference speed for the different models on my MacBook M1 Pro:
+
+| Model | Size | Time / Token |
+| --- | --- | --- |
+| GPT-2 | 117M | 5 ms |
+| GPT-2 | 345M | 12 ms |
+| GPT-2 | 774M | 23 ms |
+| GPT-2 | 1558M | 42 ms |
+| --- | --- | --- |
+| GPT-J | 6B | 125 ms |
+
+For more information, checkout the corresponding programs in the [examples](examples) folder.
diff --git a/cmake/BuildTypes.cmake b/cmake/BuildTypes.cmake
@@ -0,0 +1,54 @@
+# Add new build types
+
+# ReleaseGG - Release with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELEASEGG
+ "-O3"
+ CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
+ FORCE )
+SET(CMAKE_C_FLAGS_RELEASEGG
+ "-O3"
+ CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
+ FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+ ""
+ CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
+ FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
+ ""
+ CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
+ FORCE )
+MARK_AS_ADVANCED(
+ CMAKE_CXX_FLAGS_RELEASEGG
+ CMAKE_C_FLAGS_RELEASEGG
+ CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+ CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
+
+# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+ "-O2 -g"
+ CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
+ FORCE )
+SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
+ "-O2 -g"
+ CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
+ FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+ ""
+ CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
+ FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
+ ""
+ CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
+ FORCE )
+MARK_AS_ADVANCED(
+ CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+ CMAKE_C_FLAGS_RELWITHDEBINFOGG
+ CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+ CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
+
+if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
+endif()
diff --git a/cmake/GitVars.cmake b/cmake/GitVars.cmake
@@ -0,0 +1,22 @@
+find_package(Git)
+
+# the commit's SHA1
+execute_process(COMMAND
+ "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+ OUTPUT_VARIABLE GIT_SHA1
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the date of the commit
+execute_process(COMMAND
+ "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+ OUTPUT_VARIABLE GIT_DATE
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the subject of the commit
+execute_process(COMMAND
+ "${GIT_EXECUTABLE}" log -1 --format=%s
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+ OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_library(ggml_utils STATIC utils.cpp)
+target_include_directories(ggml_utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+
+add_subdirectory(gpt-2)
+add_subdirectory(gpt-j)
diff --git a/examples/gpt-2/CMakeLists.txt b/examples/gpt-2/CMakeLists.txt
@@ -0,0 +1,6 @@
+#
+# gpt-2
+
+set(TEST_TARGET gpt-2)
+add_executable(${TEST_TARGET} main.cpp)
+target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils)
diff --git a/examples/gpt-2/README.md b/examples/gpt-2/README.md
@@ -0,0 +1,126 @@
+# gpt-2
+
+This is a C++ example running GPT-2 inference using the [ggml](https://github.com/ggerganov/ggml) library.
+The enitre code of the example is in [main.cpp](main.cpp).
+
+The program runs on the CPU - no video card is required.
+
+The example supports the following models:
+
+| Model | Description | Disk Size |
+| --- | --- | --- |
+| 117M | Small model | 240 MB |
+| 345M | Medium model | 680 MB |
+| 774M | Large model | 1.5 GB |
+| 1558M | XL model | 3.0 GB |
+
+Sample performance on MacBook M1 Pro:
+
+| Model | Size | Time / Token |
+| --- | --- | --- |
+| GPT-2 | 117M | 5 ms |
+| GPT-2 | 345M | 12 ms |
+| GPT-2 | 774M | 23 ms |
+| GPT-2 | 1558M | 42 ms |
+
+Sample output:
+
+```
+$ ./bin/gpt-2 -h
+usage: ./bin/gpt-2 [options]
+
+options:
+ -h, --help show this help message and exit
+ -s SEED, --seed SEED RNG seed (default: -1)
+ -t N, --threads N number of threads to use during computation (default: 8)
+ -p PROMPT, --prompt PROMPT
+ prompt to start generation with (default: random)
+ -n N, --n_predict N number of tokens to predict (default: 200)
+ --top_k N top-k sampling (default: 40)
+ --top_p N top-p sampling (default: 0.9)
+ --temp N temperature (default: 1.0)
+ -b N, --batch_size N batch size for prompt processing (default: 8)
+ -m FNAME, --model FNAME
+ model path (default: models/gpt-2-117M/ggml-model.bin)
+
+$ ./bin/gpt-2
+gpt2_model_load: loading model from 'models/gpt-2-117M/ggml-model.bin'
+gpt2_model_load: n_vocab = 50257
+gpt2_model_load: n_ctx = 1024
+gpt2_model_load: n_embd = 768
+gpt2_model_load: n_head = 12
+gpt2_model_load: n_layer = 12
+gpt2_model_load: f16 = 1
+gpt2_model_load: ggml ctx size = 311.12 MB
+gpt2_model_load: memory size = 72.00 MB, n_mem = 12288
+gpt2_model_load: model size = 239.08 MB
+main: number of tokens in prompt = 1
+
+So this is going to be the end of the line for us.
+
+If the Dolphins continue to do their business, it's possible that the team could make a bid to bring in new defensive coordinator Scott Linehan.
+
+Linehan's job is a little daunting, but he's a great coach and an excellent coach. I don't believe we're going to make the playoffs.
+
+We're going to have to work hard to keep our heads down and get ready to go.<|endoftext|>
+
+main: mem per token = 2048612 bytes
+main: load time = 106.32 ms
+main: sample time = 7.10 ms
+main: predict time = 506.40 ms / 5.06 ms per token
+main: total time = 629.84 ms
+```
+
+## Downloading and converting the original models
+
+You can download the original model files using the [download-model.sh](download-model.sh) Bash script.
+The model is in Tensorflow format, so before using it with ggml, we need to convert it to appropriate format.
+This is done via the [convert-ckpt-to-ggml.py](convert-ckpt-to-ggml.py) python script.
+
+Here is the entire process for the GPT-2 117M model:
+
+```
+cd ggml/build
+../examples/gpt-2/download-model.sh 117M
+
+Downloading model 117M ...
+models/gpt-2-117M/checkpoint 100%[=============================>] 77 --.-KB/s in 0s
+models/gpt-2-117M/encoder.json 100%[=============================>] 1018K 1.20MB/s in 0.8s
+models/gpt-2-117M/hparams.json 100%[=============================>] 90 --.-KB/s in 0s
+models/gpt-2-117M/model.ckpt.data-00000-of-00001 100%[=============================>] 474.70M 1.21MB/s in 8m 39s
+models/gpt-2-117M/model.ckpt.index 100%[=============================>] 5.09K --.-KB/s in 0s
+models/gpt-2-117M/model.ckpt.meta 100%[=============================>] 460.11K 806KB/s in 0.6s
+models/gpt-2-117M/vocab.bpe 100%[=============================>] 445.62K 799KB/s in 0.6s
+Done! Model '117M' saved in 'models/gpt-2-117M/'
+
+Run the convert-ckpt-to-ggml.py script to convert the model to ggml format.
+
+ python /Users/john/ggml/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-117M/
+
+```
+
+This conversion requires that you have python and Tensorflow installed on your computer.
+Still, if you want to avoid this, you can download the already converted ggml models as
+described below.
+
+## Downloading the ggml model directly
+
+For convenience, I will be hosting the converted ggml model files in order to make it easier to run the examples.
+This way, you can directly download a single binary file and start using it. No python or Tensorflow is required.
+
+Here is how to get the 117M ggml model:
+
+```
+cd ggml/build
+../examples/gpt-2/download-ggml-model.sh 117M
+
+Downloading ggml model 117M ...
+models/gpt-2-117M/ggml-model.bin 100%[===============================>] 239.58M 8.52MB/s in 28s
+Done! Model '117M' saved in 'models/gpt-2-117M/ggml-model.bin'
+You can now use it like this:
+
+ $ ./bin/gpt-2 -m models/gpt-2-117M/ggml-model.bin -p "This is an example"
+
+```
+
+At some point, I might stop hosting these models. So in that case, simply revert to the manual process above.