ggerganov · ggerganov · Feb 10, 2024 · Feb 10, 2024 · Feb 10, 2024
diff --git a/README.md b/README.md
@@ -25,21 +25,13 @@ Some of the development is currently happening in the [llama.cpp](https://github
 - [X] Example of GPT-2 inference [examples/gpt-2](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2)
 - [X] Example of GPT-J inference [examples/gpt-j](https://github.com/ggerganov/ggml/tree/master/examples/gpt-j)
 - [X] Example of Whisper inference [examples/whisper](https://github.com/ggerganov/ggml/tree/master/examples/whisper)
-- [X] Support 4-bit integer quantization https://github.com/ggerganov/ggml/pull/27
-- [X] Example of Cerebras-GPT inference [examples/gpt-2](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2)
-- [ ] Example of FLAN-T5 inference https://github.com/ggerganov/ggml/pull/12
 - [X] Example of LLaMA inference [ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp)
 - [X] Example of LLaMA training [ggerganov/llama.cpp/examples/baby-llama](https://github.com/ggerganov/llama.cpp/tree/master/examples/baby-llama)
 - [X] Example of Falcon inference [cmp-nct/ggllm.cpp](https://github.com/cmp-nct/ggllm.cpp)
 - [X] Example of BLOOM inference [NouamaneTazi/bloomz.cpp](https://github.com/NouamaneTazi/bloomz.cpp)
 - [X] Example of RWKV inference [saharNooby/rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
 - [X] Example of SAM inference [examples/sam](https://github.com/ggerganov/ggml/tree/master/examples/sam)
-- [X] Idea for GPU support: https://github.com/ggerganov/llama.cpp/discussions/915
-- [X] Example of StableLM (GPT-NeoX) inference [examples/gpt-neox](https://github.com/ggerganov/ggml/tree/master/examples/gpt-neox)
 - [X] Example of BERT inference [skeskinen/bert.cpp](https://github.com/skeskinen/bert.cpp)
-- [X] Example of 💫 StarCoder inference [examples/starcoder](https://github.com/ggerganov/ggml/tree/master/examples/starcoder)
-- [X] Example of MPT inference [examples/mpt](https://github.com/ggerganov/ggml/tree/master/examples/mpt)
-- [X] Example of Replit inference [examples/replit](https://github.com/ggerganov/ggml/tree/master/examples/replit)
 - [X] Example of BioGPT inference [PABannier/biogpt.cpp](https://github.com/PABannier/biogpt.cpp)
 - [X] Example of Encodec inference [PABannier/encodec.cpp](https://github.com/PABannier/encodec.cpp)
 - [X] Example of CLIP inference [monatis/clip.cpp](https://github.com/monatis/clip.cpp)
@@ -145,7 +137,7 @@ cmake .. \
  -DCMAKE_SYSTEM_VERSION=33 \
  -DCMAKE_ANDROID_ARCH_ABI=arm64-v8a \
  -DCMAKE_ANDROID_NDK=$NDK_ROOT_PATH
- -DCMAKE_ANDROID_STL_TYPE=c++_shared 
+ -DCMAKE_ANDROID_STL_TYPE=c++_shared
 ```
 
 ```bash

diff --git a/ci/run.sh b/ci/run.sh
@@ -319,52 +319,6 @@ function gg_sum_yolo {
  gg_printf '```\n'
 }
 
-# mpt
-
-function gg_run_mpt {
- cd ${SRC}
-
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/config.json
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/warnings.py
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/fc.py
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/attention.py
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/blocks.py
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/ffn.py
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/norm.py
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer.json
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer_config.json
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/pytorch_model.bin.index.json
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/configuration_mpt.py
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00001-of-00002.bin
- gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00002-of-00002.bin
-
- cd build-ci-release
-
- set -e
-
- path_models="../models-mnt/mpt/7B"
- model_f16="${path_models}/ggml-model-f16.bin"
- model_q4_0="${path_models}/ggml-model-q4_0.bin"
-
- python3 ../examples/mpt/convert-h5-to-ggml.py ${path_models} 1
- ./bin/mpt-quantize ${model_f16} ${model_q4_0} q4_0
-
- (time ./bin/mpt --model ${model_f16} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
- (time ./bin/mpt --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
-
- set +e
-}
-
-function gg_sum_mpt {
- gg_printf '### %s\n\n' "${ci}"
-
- gg_printf 'Runs short MPT text generation\n'
- gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
- gg_printf '```\n'
- gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)"
- gg_printf '```\n'
-}
-
 ## main
 
 if [ -z $GG_BUILD_LOW_PERF ]; then
@@ -394,7 +348,8 @@ test $ret -eq 0 && gg_run yolo
 
 if [ -z $GG_BUILD_LOW_PERF ]; then
  if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 16 ]; then
- test $ret -eq 0 && gg_run mpt
+ # run tests that require GPU with at least 16GB of VRAM
+ date
  fi
 fi
 

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -22,10 +22,5 @@ add_subdirectory(gpt-2)
 add_subdirectory(gpt-j)
 add_subdirectory(whisper)
 add_subdirectory(mnist)
-add_subdirectory(gpt-neox)
-add_subdirectory(dolly-v2)
-add_subdirectory(replit)
-add_subdirectory(mpt)
-add_subdirectory(starcoder)
 add_subdirectory(sam)
 add_subdirectory(yolo)
diff --git a/examples/dolly-v2/CMakeLists.txt b/examples/dolly-v2/CMakeLists.txt
diff --git a/examples/dolly-v2/README.md b/examples/dolly-v2/README.md