Merge remote-tracking branch 'origin/master' into agent-example

ggerganov · Apr 30, 2024 · 7675ac6 · 7675ac6
2 parents b4a00ce + a68a1e7
commit 7675ac6
Show file tree

Hide file tree

Showing 99 changed files with 6,606 additions and 1,633 deletions.
diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile
@@ -10,14 +10,12 @@ WORKDIR /app
 
 COPY . .
 
-RUN mkdir build && \
- cd build && \
- if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
  echo "LLAMA_SYCL_F16 is set" && \
  export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
  fi && \
- cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
- cmake --build . --config Release --target main
+ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
+ cmake --build build --config Release --target main
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 

diff --git a/.devops/main-vulkan.Dockerfile b/.devops/main-vulkan.Dockerfile
@@ -14,10 +14,8 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
 # Build it
 WORKDIR /app
 COPY . .
-RUN mkdir build && \
- cd build && \
- cmake .. -DLLAMA_VULKAN=1 && \
- cmake --build . --config Release --target main
+RUN cmake -B build -DLLAMA_VULKAN=1 && \
+ cmake --build build --config Release --target main
 
 # Clean up
 WORKDIR /

diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile
@@ -10,14 +10,12 @@ WORKDIR /app
 
 COPY . .
 
-RUN mkdir build && \
- cd build && \
- if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
  echo "LLAMA_SYCL_F16 is set" && \
  export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
  fi && \
- cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
- cmake --build . --config Release --target server
+ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
+ cmake --build build --config Release --target server
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 

diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile
@@ -18,10 +18,8 @@ RUN apt-get update && \
 # Build it
 WORKDIR /app
 COPY . .
-RUN mkdir build && \
- cd build && \
- cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
- cmake --build . --config Release --target server
+RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
+ cmake --build build --config Release --target server
 
 # Clean up
 WORKDIR /

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -96,9 +96,7 @@ jobs:
  id: cmake_build
  run: |
  set -eux
- mkdir build
- cd build
- cmake .. \
+ cmake -B build \
  -DLLAMA_NATIVE=OFF \
  -DLLAMA_BUILD_SERVER=ON \
  -DLLAMA_CURL=ON \
@@ -109,7 +107,7 @@ jobs:
  -DLLAMA_FATAL_WARNINGS=OFF \
  -DLLAMA_ALL_WARNINGS=OFF \
  -DCMAKE_BUILD_TYPE=Release;
- cmake --build . --config Release -j $(nproc) --target server
+ cmake --build build --config Release -j $(nproc) --target server
 
  - name: Download the dataset
  id: download_dataset

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -593,6 +593,63 @@ jobs:
  run: |
  make swift
 
+ windows-msys2:
+ runs-on: windows-latest
+
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
+
+ steps:
+ - name: Clone
+ uses: actions/checkout@v4
+
+ - name: Setup ${{ matrix.sys }}
+ uses: msys2/setup-msys2@v2
+ with:
+ update: true
+ msystem: ${{matrix.sys}}
+ install: >-
+ base-devel
+ mingw-w64-${{matrix.env}}-toolchain
+ mingw-w64-${{matrix.env}}-cmake
+ mingw-w64-${{matrix.env}}-openblas
+
+ - name: Build using make
+ shell: msys2 {0}
+ run: |
+ make -j $(nproc)
+
+ - name: Clean after building using make
+ shell: msys2 {0}
+ run: |
+ make clean
+
+ - name: Build using make w/ OpenBLAS
+ shell: msys2 {0}
+ run: |
+ make LLAMA_OPENBLAS=1 -j $(nproc)
+
+ - name: Build using CMake
+ shell: msys2 {0}
+ run: |
+ cmake -B build
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
+ - name: Clean after building using CMake
+ shell: msys2 {0}
+ run: |
+ rm -rf build
+
+ - name: Build using CMake w/ OpenBLAS
+ shell: msys2 {0}
+ run: |
+ cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
  windows-latest-cmake:
  runs-on: windows-latest
 

diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
@@ -21,4 +21,4 @@ jobs:
  uses: py-actions/flake8@v2
  with:
  ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503"
- exclude: "examples/*,examples/*/**,*/**/__init__.py"
+ exclude: "examples/*,examples/*/**,*/**/__init__.py,convert-hf-to-gguf-update.py"
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -41,24 +41,16 @@ jobs:
  sanitizer: ""
  fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
 
- container:
- image: ubuntu:latest
- ports:
- - 8888
- options: --cpus 4
-
  steps:
  - name: Dependencies
  id: depends
  run: |
- apt-get update
- apt-get -y install \
+ sudo apt-get update
+ sudo apt-get -y install \
  build-essential \
  xxd \
  git \
  cmake \
- python3-pip \
- python3-venv \
  curl \
  wget \
  language-pack-en \
@@ -71,6 +63,17 @@ jobs:
  fetch-depth: 0
  ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
 
+ - name: Python setup
+ id: setup_python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Tests dependencies
+ id: test_dependencies
+ run: |
+ pip install -r examples/server/tests/requirements.txt
+
  - name: Verify server deps
  id: verify_server_deps
  run: |
@@ -91,23 +94,14 @@ jobs:
  - name: Build
  id: cmake_build
  run: |
- mkdir build
- cd build
- cmake .. \
+ cmake -B build \
  -DLLAMA_NATIVE=OFF \
  -DLLAMA_BUILD_SERVER=ON \
  -DLLAMA_CURL=ON \
  -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
  -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
- cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
 
- - name: Setup python env
- id: pipenv
- run: |
- cd examples/server/tests
- python3 -m venv venv
- . venv/bin/activate
- pip install -r requirements.txt
 
  - name: Tests
  id: server_integration_tests
@@ -133,6 +127,7 @@ jobs:
  uses: actions/checkout@v4
  with:
  fetch-depth: 0
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
 
  - name: libCURL
  id: get_libcurl
@@ -146,10 +141,8 @@ jobs:
  - name: Build
  id: cmake_build
  run: |
- mkdir build
- cd build
- cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
- cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
+ cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
 
  - name: Python setup
  id: setup_python

diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 *.a
 *.so
 *.gguf
+*.gguf.json
 *.bin
 *.exe
 *.dll
@@ -108,3 +109,18 @@ examples/server/*.mjs.hpp
 poetry.lock
 poetry.toml
 nppBackup
+
+# Test binaries
+/tests/test-grammar-parser
+/tests/test-llama-grammar
+/tests/test-double-float
+/tests/test-grad0
+/tests/test-opt
+/tests/test-quantize-fns
+/tests/test-quantize-perf
+/tests/test-sampling
+/tests/test-tokenizer-0
+/tests/test-tokenizer-1-spm
+/tests/test-tokenizer-1-bpe
+/tests/test-rope
+/tests/test-backend-ops
diff --git a/Makefile b/Makefile
@@ -6,11 +6,23 @@ BUILD_TARGETS = \
 
 # Binaries only useful for tests
 TEST_TARGETS = \
- tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
- tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
- tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
- tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease \
- tests/test-json-schema-to-grammar tests/test-grammar-integration
+ tests/test-autorelease \
+ tests/test-backend-ops \
+ tests/test-double-float \
+ tests/test-grad0 \
+ tests/test-grammar-integration \
+ tests/test-grammar-parser \
+ tests/test-json-schema-to-grammar \
+ tests/test-llama-grammar \
+ tests/test-model-load-cancel \
+ tests/test-opt \
+ tests/test-quantize-fns \
+ tests/test-quantize-perf \
+ tests/test-rope \
+ tests/test-sampling \
+ tests/test-tokenizer-0 \
+ tests/test-tokenizer-1-bpe \
+ tests/test-tokenizer-1-spm
 
 # Code coverage output files
 COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -27,6 +39,17 @@ ifndef UNAME_M
 UNAME_M := $(shell uname -m)
 endif
 
+# In GNU make default CXX is g++ instead of c++. Let's fix that so that users
+# of non-gcc compilers don't have to provide g++ alias or wrapper.
+DEFCC := cc
+DEFCXX := c++
+ifeq ($(origin CC),default)
+CC := $(DEFCC)
+endif
+ifeq ($(origin CXX),default)
+CXX := $(DEFCXX)
+endif
+
 # Mac OS + Arm can report x86_64
 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
 ifeq ($(UNAME_S),Darwin)
@@ -49,11 +72,17 @@ default: $(BUILD_TARGETS)
 test: $(TEST_TARGETS)
  @failures=0; \
  for test_target in $(TEST_TARGETS); do \
- if [ "$$test_target" = "tests/test-tokenizer-0-llama" ]; then \
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
- elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
+ if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
  ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
- elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-coder.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-llm.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
+ elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
  continue; \
  elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
  continue; \
@@ -971,19 +1000,15 @@ tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
+tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
 tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
+tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
  $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
  $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)