diff --git a/.devops/full-cuda.Dockerfile b/.devops/full-cuda.Dockerfile index 8cc1480d316c2..059fd26950607 100644 --- a/.devops/full-cuda.Dockerfile +++ b/.devops/full-cuda.Dockerfile @@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build ARG CUDA_DOCKER_ARCH=all RUN apt-get update && \ - apt-get install -y build-essential python3 python3-pip git + apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev COPY requirements.txt requirements.txt COPY requirements requirements @@ -28,6 +28,8 @@ COPY . . ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA ENV LLAMA_CUDA=1 +# Enable cURL +ENV LLAMA_CURL=1 RUN make diff --git a/.devops/full-rocm.Dockerfile b/.devops/full-rocm.Dockerfile index 8b9633dc4ebf5..6ecf3bcc7cb83 100644 --- a/.devops/full-rocm.Dockerfile +++ b/.devops/full-rocm.Dockerfile @@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + RUN make ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/full.Dockerfile b/.devops/full.Dockerfile index cef1297d3e156..432fb5dadcbca 100644 --- a/.devops/full.Dockerfile +++ b/.devops/full.Dockerfile @@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04 FROM ubuntu:$UBUNTU_VERSION as build RUN apt-get update && \ - apt-get install -y build-essential python3 python3-pip git + apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev COPY requirements.txt requirements.txt COPY requirements requirements @@ -15,6 +15,9 @@ WORKDIR /app COPY . . +ENV LLAMA_CURL=1 + + RUN make ENV LC_ALL=C.utf8 diff --git a/.devops/server-cuda.Dockerfile b/.devops/server-cuda.Dockerfile index 5683a364652b1..59a52ba21a3f1 100644 --- a/.devops/server-cuda.Dockerfile +++ b/.devops/server-cuda.Dockerfile @@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build ARG CUDA_DOCKER_ARCH=all RUN apt-get update && \ - apt-get install -y build-essential git + apt-get install -y build-essential git libcurl4-openssl-dev WORKDIR /app @@ -22,11 +22,16 @@ COPY . . ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA ENV LLAMA_CUDA=1 +# Enable cURL +ENV LLAMA_CURL=1 RUN make FROM ${BASE_CUDA_RUN_CONTAINER} as runtime +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + COPY --from=build /app/server /server ENTRYPOINT [ "/server" ] diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index 312f2df809de6..3044873354ac6 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -4,7 +4,7 @@ FROM intel/oneapi-basekit:$ONEAPI_VERSION as build ARG LLAMA_SYCL_F16=OFF RUN apt-get update && \ - apt-get install -y git + apt-get install -y git libcurl4-openssl-dev WORKDIR /app @@ -16,11 +16,14 @@ RUN mkdir build && \ echo "LLAMA_SYCL_F16 is set" && \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ - cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ cmake --build . --config Release --target server FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + COPY --from=build /app/build/bin/server /server ENV LC_ALL=C.utf8 diff --git a/.devops/server-rocm.Dockerfile b/.devops/server-rocm.Dockerfile index e9a31647c2fd0..c02a31dd8c756 100644 --- a/.devops/server-rocm.Dockerfile +++ b/.devops/server-rocm.Dockerfile @@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + RUN make ENTRYPOINT [ "/app/server" ] diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile index e0add6fc36286..7e5a5283b500a 100644 --- a/.devops/server-vulkan.Dockerfile +++ b/.devops/server-vulkan.Dockerfile @@ -11,12 +11,16 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key apt update -y && \ apt-get install -y vulkan-sdk +# Install cURL +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + # Build it WORKDIR /app COPY . . RUN mkdir build && \ cd build && \ - cmake .. -DLLAMA_VULKAN=1 && \ + cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ cmake --build . --config Release --target server # Clean up diff --git a/.devops/server.Dockerfile b/.devops/server.Dockerfile index 134588fe2e4cb..be964e0e83648 100644 --- a/.devops/server.Dockerfile +++ b/.devops/server.Dockerfile @@ -3,16 +3,21 @@ ARG UBUNTU_VERSION=22.04 FROM ubuntu:$UBUNTU_VERSION as build RUN apt-get update && \ - apt-get install -y build-essential git + apt-get install -y build-essential git libcurl4-openssl-dev WORKDIR /app COPY . . +ENV LLAMA_CURL=1 + RUN make FROM ubuntu:$UBUNTU_VERSION as runtime +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + COPY --from=build /app/server /server ENV LC_ALL=C.utf8 diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 949d806f67bf4..d50af0b70e17c 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -24,15 +24,15 @@ on: push: branches: - master - paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*'] + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] pull_request_target: types: [opened, synchronize, reopened] - paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*'] + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] schedule: - cron: '04 2 * * *' concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref || github.run_id }}-${{ github.event.inputs.sha }} cancel-in-progress: true jobs: @@ -42,11 +42,21 @@ jobs: RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it N_USERS: 8 DURATION: 10m + + strategy: + matrix: + model: [phi-2] + ftype: [q4_0, q8_0, f16] + include: + - model: phi-2 + ftype: q4_0 + pr_comment_enabled: "true" + if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }} steps: - name: Clone id: checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} @@ -69,12 +79,18 @@ jobs: sleep 0.1 done - - name: Install k6 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Install k6 and xk6-sse id: k6_installation run: | cd examples/server/bench - wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz - tar xzf k6*.tar.gz --strip-components=1 + go install go.k6.io/xk6/cmd/xk6@latest + xk6 build master \ + --with github.com/phymbert/xk6-sse - name: Build id: cmake_build @@ -108,7 +124,7 @@ jobs: cd examples/server/bench source venv/bin/activate - BENCH_K6_BIN_PATH=./k6 python bench.py \ + python bench.py \ --runner-label ${{ env.RUNNER_LABEL }} \ --name ${{ github.job }} \ --branch ${{ github.head_ref || github.ref_name }} \ @@ -116,7 +132,7 @@ jobs: --scenario script.js \ --duration ${{ github.event.inputs.duration || env.DURATION }} \ --hf-repo ggml-org/models \ - --hf-file phi-2/ggml-model-q4_0.gguf \ + --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ --model-path-prefix /models \ --parallel ${{ env.N_USERS }} \ -ngl 33 \ @@ -134,7 +150,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: benchmark-results + name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} compression-level: 9 path: | examples/server/bench/*.jpg @@ -146,7 +162,7 @@ jobs: with: authToken: ${{secrets.GITHUB_TOKEN}} sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} - context: bench-server-baseline + context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} description: | ${{ env.BENCH_RESULTS }} state: 'success' @@ -203,21 +219,26 @@ jobs: - name: Comment PR uses: mshick/add-pr-comment@v2 id: comment_pr - if: ${{ github.event.pull_request != '' }} + if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} with: - message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }} + message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} message: | - 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 +
- - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} - - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} - - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s** - - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s** - - ${{ env.BENCH_GRAPH_XLABEL }} + 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 + +
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 012ab03fe15a9..50f76db3c0d46 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,7 +16,7 @@ on:
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
@@ -31,7 +31,9 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
- name: Dependencies
id: depends
@@ -52,7 +54,7 @@ jobs:
id: cmake_test
run: |
cd build
- ctest -L main --verbose --timeout 900
+ ctest -L 'main|curl' --verbose --timeout 900
- name: Determine tag name
id: tag
@@ -76,10 +78,10 @@ jobs:
- name: Upload artifacts
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
+ name: llama-bin-macos-arm64.zip
macOS-latest-cmake-x64:
runs-on: macos-latest
@@ -87,7 +89,9 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
- name: Dependencies
id: depends
@@ -101,7 +105,9 @@ jobs:
sysctl -a
mkdir build
cd build
- cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
+ # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
+ cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON ..
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
- name: Test
@@ -132,10 +138,10 @@ jobs:
- name: Upload artifacts
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
+ name: llama-bin-macos-x64.zip
ubuntu-focal-make:
runs-on: ubuntu-20.04
@@ -146,7 +152,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Dependencies
id: depends
@@ -158,7 +164,7 @@ jobs:
with:
node-version: "20"
- - uses: actions/setup-python@v4
+ - uses: actions/setup-python@v5
with:
python-version: "3.11"
@@ -181,7 +187,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Dependencies
id: depends
@@ -203,27 +209,29 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
- name: Dependencies
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential
+ sudo apt-get install build-essential libcurl4-openssl-dev
- name: Build
id: cmake_build
run: |
mkdir build
cd build
- cmake .. -DLLAMA_FATAL_WARNINGS=ON
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
cmake --build . --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
- ctest -L main --verbose --timeout 900
+ ctest -L 'main|curl' --verbose --timeout 900
- name: Test llama2c conversion
id: llama2c_test
@@ -236,6 +244,33 @@ jobs:
./bin/convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
./bin/main -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ run: |
+ cp LICENSE ./build/bin/
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
+
+ - name: Upload artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
+ name: llama-bin-ubuntu-x64.zip
+
# ubuntu-latest-cmake-sanitizer:
# runs-on: ubuntu-latest
#
@@ -249,7 +284,7 @@ jobs:
# steps:
# - name: Clone
# id: checkout
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Dependencies
# id: depends
@@ -283,7 +318,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Dependencies
id: depends
@@ -311,7 +346,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Dependencies
id: depends
@@ -357,7 +392,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Build
id: cmake_build
@@ -398,7 +433,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Build
id: cmake_build
@@ -418,7 +453,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Dependencies
id: depends
@@ -449,7 +484,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Dependencies
id: depends
@@ -593,7 +628,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -723,10 +758,10 @@ jobs:
- name: Upload artifacts
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-x64.zip
+ name: llama-bin-win-${{ matrix.build }}-x64.zip
windows-latest-cmake-cuda:
runs-on: windows-latest
@@ -739,7 +774,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -779,10 +814,10 @@ jobs:
- name: Upload artifacts
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
+ name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
- name: Copy and pack Cuda runtime
run: |
@@ -793,10 +828,10 @@ jobs:
- name: Upload Cuda runtime
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
- path: |
- cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+ path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+ name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
windows-latest-cmake-sycl:
runs-on: windows-latest
@@ -812,7 +847,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -844,17 +879,17 @@ jobs:
- name: Upload artifacts
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
- path: |
- llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
+ name: llama-bin-win-sycl-x64.zip
ios-xcode-build:
runs-on: macos-latest
steps:
- name: Checkout code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Build Xcode project
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
@@ -864,7 +899,7 @@ jobs:
steps:
- name: Clone
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up JDK
uses: actions/setup-java@v3
@@ -887,7 +922,7 @@ jobs:
# runs-on: macos-12
# steps:
# - name: Clone
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Build
# uses: cross-platform-actions/action@v0.19.0
@@ -918,7 +953,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -937,7 +972,13 @@ jobs:
- name: Download artifacts
id: download-artifact
- uses: actions/download-artifact@v3
+ uses: actions/download-artifact@v4
+ with:
+ path: ./artifact
+
+ - name: Move artifacts
+ id: move_artifacts
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
- name: Create release
id: create_release
@@ -956,7 +997,7 @@ jobs:
const path = require('path');
const fs = require('fs');
const release_id = '${{ steps.create_release.outputs.id }}';
- for (let file of await fs.readdirSync('./artifact')) {
+ for (let file of await fs.readdirSync('./artifact/release')) {
if (path.extname(file) === '.zip') {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
@@ -964,7 +1005,7 @@ jobs:
repo: context.repo.repo,
release_id: release_id,
name: file,
- data: await fs.readFileSync(`./artifact/${file}`)
+ data: await fs.readFileSync(`./artifact/release/${file}`)
});
}
}
@@ -978,7 +1019,7 @@ jobs:
#
# steps:
# - name: Clone
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Dependencies
# run: |
@@ -1002,7 +1043,7 @@ jobs:
#
# steps:
# - name: Clone
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Dependencies
# run: |
@@ -1026,7 +1067,7 @@ jobs:
#
# steps:
# - name: Clone
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Dependencies
# run: |
@@ -1056,7 +1097,7 @@ jobs:
#
# steps:
# - name: Clone
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Add msbuild to PATH
# uses: microsoft/setup-msbuild@v1
@@ -1072,7 +1113,7 @@ jobs:
# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
#
# - name: Upload binaries
-# uses: actions/upload-artifact@v1
+# uses: actions/upload-artifact@v4
# with:
# name: llama-bin-${{ matrix.arch }}
# path: build/bin/${{ matrix.build }}
@@ -1095,7 +1136,7 @@ jobs:
#
# steps:
# - name: Clone
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Add msbuild to PATH
# uses: microsoft/setup-msbuild@v1
@@ -1127,7 +1168,7 @@ jobs:
#
# - name: Upload binaries
# if: matrix.blas == 'ON'
-# uses: actions/upload-artifact@v1
+# uses: actions/upload-artifact@v4
# with:
# name: llama-blas-bin-${{ matrix.arch }}
# path: build/bin/${{ matrix.build }}
@@ -1141,7 +1182,7 @@ jobs:
#
# steps:
# - name: Clone
-# uses: actions/checkout@v3
+# uses: actions/checkout@v4
#
# - name: Dependencies
# run: |
diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
index 4112518bb813c..f12c558f81bae 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -6,7 +6,7 @@ env:
GGML_N_THREADS: 1
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
@@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Dependencies
run: |
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 9591bfc2a5d3b..9b03d19bc77c6 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -16,7 +16,7 @@ on:
- master
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
@@ -46,7 +46,7 @@ jobs:
- { tag: "server-intel", dockerfile: ".devops/server-intel.Dockerfile", platforms: "linux/amd64" }
steps:
- name: Check out the repo
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
@@ -91,6 +91,12 @@ jobs:
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
fi
+ - name: Downcase github.repository_owner
+ run: |
+ echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
+ env:
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
+
- name: Build and push Docker image (versioned)
if: github.event_name == 'push'
uses: docker/build-push-action@v4
@@ -98,7 +104,7 @@ jobs:
context: .
push: true
platforms: ${{ matrix.config.platforms }}
- tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
+ tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
file: ${{ matrix.config.dockerfile }}
- name: Build and push Docker image (tagged)
@@ -107,5 +113,5 @@ jobs:
context: .
push: ${{ github.event_name == 'push' }}
platforms: ${{ matrix.config.platforms }}
- tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
+ tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
file: ${{ matrix.config.dockerfile }}
diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
index 7b2a00c904237..ae86e99275265 100644
--- a/.github/workflows/editorconfig.yml
+++ b/.github/workflows/editorconfig.yml
@@ -15,13 +15,13 @@ on:
- master
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
editorconfig:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- uses: editorconfig-checker/action-editorconfig-checker@main
- run: editorconfig-checker
diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml
index 57db175122c03..3ca4d30581074 100644
--- a/.github/workflows/gguf-publish.yml
+++ b/.github/workflows/gguf-publish.yml
@@ -24,9 +24,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
python-version: '3.9.x'
- name: Install dependencies
diff --git a/.github/workflows/nix-ci-aarch64.yml b/.github/workflows/nix-ci-aarch64.yml
index 109a793ea4387..4aa4b2379dccf 100644
--- a/.github/workflows/nix-ci-aarch64.yml
+++ b/.github/workflows/nix-ci-aarch64.yml
@@ -18,7 +18,7 @@ on:
paths: ['**/*.nix', 'flake.lock']
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/nix-ci.yml b/.github/workflows/nix-ci.yml
index 8b5b99c8f8e17..8955f38d020a6 100644
--- a/.github/workflows/nix-ci.yml
+++ b/.github/workflows/nix-ci.yml
@@ -9,7 +9,7 @@ on:
types: [opened, synchronize, reopened]
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml
index 4092b12fa9930..4e0374fc63d95 100644
--- a/.github/workflows/python-check-requirements.yml
+++ b/.github/workflows/python-check-requirements.yml
@@ -17,7 +17,7 @@ on:
- 'requirements/*.txt'
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
@@ -26,9 +26,9 @@ jobs:
name: check-requirements
steps:
- name: Check out source repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up Python environment
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Run check-requirements.sh script
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
index 4bdd79c4a0b19..f4ae654959be3 100644
--- a/.github/workflows/python-lint.yml
+++ b/.github/workflows/python-lint.yml
@@ -3,7 +3,7 @@ name: flake8 Lint
on: [push, pull_request]
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
@@ -12,9 +12,9 @@ jobs:
name: Lint
steps:
- name: Check out source repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up Python environment
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: flake8 Lint
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index f07d2553669af..3e68a3c8c433f 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -4,6 +4,10 @@ name: Server
on:
workflow_dispatch: # allows manual triggering
inputs:
+ sha:
+ description: 'Commit SHA1 to build'
+ required: false
+ type: string
slow_tests:
description: 'Run slow tests'
required: true
@@ -11,15 +15,15 @@ on:
push:
branches:
- master
- paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
- pull_request:
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
+ pull_request_target:
types: [opened, synchronize, reopened]
- paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
schedule:
- - cron: '0 0 * * *'
+ - cron: '2 4 * * *'
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
cancel-in-progress: true
jobs:
@@ -44,25 +48,45 @@ jobs:
options: --cpus 4
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v3
- with:
- fetch-depth: 0
-
- name: Dependencies
id: depends
run: |
apt-get update
apt-get -y install \
build-essential \
+ xxd \
git \
cmake \
python3-pip \
+ curl \
wget \
language-pack-en \
libcurl4-openssl-dev
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
+
+ - name: Verify server deps
+ id: verify_server_deps
+ run: |
+ git config --global --add safe.directory $(realpath .)
+ cd examples/server
+ git ls-files --others --modified
+ git status
+ ./deps.sh
+ git status
+ not_ignored_files="$(git ls-files --others --modified)"
+ echo "Modified files: ${not_ignored_files}"
+ if [ -n "${not_ignored_files}" ]; then
+ echo "Repository is dirty or server deps are not built as expected"
+ echo "${not_ignored_files}"
+ exit 1
+ fi
+
- name: Build
id: cmake_build
run: |
@@ -102,7 +126,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
diff --git a/.github/workflows/zig-build.yml b/.github/workflows/zig-build.yml
index cb43954eb452d..747c35cc07a96 100644
--- a/.github/workflows/zig-build.yml
+++ b/.github/workflows/zig-build.yml
@@ -7,7 +7,7 @@ on:
- master
concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
@@ -18,7 +18,7 @@ jobs:
runs-on: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.runs-on }}
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
diff --git a/.gitignore b/.gitignore
index 9fb5b80c38e74..5c14900844435 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ lcov-report/
gcovr-report/
build*
+!build.zig
cmake-build-*
out/
tmp/
@@ -48,6 +49,7 @@ models-mnt
/convert-llama2c-to-ggml
/embd-input-test
/embedding
+/eval-callback
/gguf
/gguf-llama-simple
/gguf-split
@@ -99,6 +101,9 @@ qnt-*.txt
perf-*.txt
examples/jeopardy/results.txt
+examples/server/*.html.hpp
+examples/server/*.js.hpp
+examples/server/*.mjs.hpp
poetry.lock
poetry.toml
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000000000..b029f13da3b56
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,655 @@
+# date: Tue Apr 9 09:17:14 EEST 2024
+# this file is auto-generated by scripts/gen-authors.sh
+
+0cc4m