From fcf734c58122237de6fb39ad12820a6c22bf6edd Mon Sep 17 00:00:00 2001
From: Ravindra Marella <mv.ravindra007@gmail.com>
Date: Thu, 18 May 2023 18:48:26 +0530
Subject: [PATCH 1/3] mpt : move global variable `n_ctx` to `mpt_hparams`

---
 examples/mpt/main.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/examples/mpt/main.cpp b/examples/mpt/main.cpp
index 5a60367a3..25c2288aa 100644
--- a/examples/mpt/main.cpp
+++ b/examples/mpt/main.cpp
@@ -18,10 +18,9 @@
 #include <utility>
 #include <vector>
 
-int n_ctx = 4096;
-
 // no defaults for now
 struct mpt_hparams {
+    int32_t n_ctx = 4096;
     int32_t d_model = 0;
     int32_t max_seq_len = 0;
     int32_t n_heads = 0;
@@ -141,6 +140,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
     {
         const auto & hparams = model.hparams;
 
+        const int32_t n_ctx = hparams.n_ctx;
         const size_t n_embd = hparams.d_model;
         const size_t n_layer = hparams.n_layers;
         const size_t n_vocab = hparams.n_vocab;
@@ -220,6 +220,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
     {
         const auto & hparams = model.hparams;
 
+        const int32_t n_ctx = hparams.n_ctx;
         const size_t n_embd = hparams.d_model;
         const size_t n_layer = hparams.n_layers;
 
@@ -231,7 +232,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
 
         const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v);
 
-        printf("%s: memory_size = %8.2f MB, n_mem = %lld\n", __func__, memory_size / 1024.0 / 1024.0, n_mem);
+        printf("%s: memory_size = %8.2f MB, n_mem = %ld\n", __func__, memory_size / 1024.0 / 1024.0, n_mem);
     }
 
     // load weights
@@ -332,6 +333,7 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
 
     const auto & hparams = model.hparams;
 
+    const int32_t n_ctx = hparams.n_ctx;
     const int n_embd = hparams.d_model;
     const int n_layer = hparams.n_layers;
     const int n_head = hparams.n_heads;
@@ -593,6 +595,7 @@ int main(int argc, char ** argv) {
     }
     printf("\n");
 
+    const int32_t n_ctx = model.hparams.n_ctx;
     params.n_predict = std::min(params.n_predict, n_ctx - (int)embd_inp.size());
 
     std::vector<gpt_vocab::id> embd;

From eec32d1b71e691b57eb3253cea0a581e11bedd5f Mon Sep 17 00:00:00 2001
From: Ravindra Marella <mv.ravindra007@gmail.com>
Date: Sun, 21 May 2023 02:39:31 +0530
Subject: [PATCH 2/3] mpt : fix warnings

---
 examples/mpt/main.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/mpt/main.cpp b/examples/mpt/main.cpp
index e4caf7edd..095654209 100644
--- a/examples/mpt/main.cpp
+++ b/examples/mpt/main.cpp
@@ -203,7 +203,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
         model.tensors["transformer.wte.weight"] = model.wte_weight;
         model.tensors["transformer.norm_f.weight"] = model.norm_f_weight;
 
-        for (int i = 0; i < n_layer; ++i) {
+        for (int i = 0; i < (int)n_layer; ++i) {
             auto & layer = model.layers[i];
 
             layer.norm_1_weight = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
@@ -379,7 +379,8 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
     };
 
     struct ggml_context * ctx0 = ggml_init(params);
-    struct ggml_cgraph gf = {.n_threads = n_threads};
+    struct ggml_cgraph gf = {};
+    gf.n_threads = n_threads;
 
     struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
     memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd));

From b717faeaad89744ca281c099b84584d063723ae4 Mon Sep 17 00:00:00 2001
From: Ravindra Marella <mv.ravindra007@gmail.com>
Date: Mon, 22 May 2023 20:56:03 +0530
Subject: [PATCH 3/3] mpt : fix `n_ctx`

---
 examples/mpt/main.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/examples/mpt/main.cpp b/examples/mpt/main.cpp
index 095654209..c1e32ee1d 100644
--- a/examples/mpt/main.cpp
+++ b/examples/mpt/main.cpp
@@ -3,6 +3,7 @@
 #include "common-ggml.h"
 #include "common.h"
 
+#include <algorithm>
 #include <cassert>
 #include <cmath>
 #include <cstddef>
@@ -19,10 +20,11 @@
 #include <vector>
 
 // no defaults for now
+// Here `n_ctx` is the max limit for context length.
+// See https://github.com/ggerganov/ggml/pull/165#issuecomment-1556233670
 struct mpt_hparams {
-    int32_t n_ctx        = 4096;
     int32_t d_model      = 0;
-    int32_t max_seq_len  = 0;
+    int32_t n_ctx        = 4096;
     int32_t n_heads      = 0;
     int32_t n_layers     = 0;
     int32_t n_vocab      = 0;
@@ -86,9 +88,10 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
     // load hparams
     {
         auto & hparams = model.hparams;
+        int32_t n_ctx;
 
         fin.read((char *) &hparams.d_model,        sizeof(hparams.d_model));
-        fin.read((char *) &hparams.max_seq_len,    sizeof(hparams.max_seq_len));
+        fin.read((char *) &n_ctx,                  sizeof(hparams.n_ctx));
         fin.read((char *) &hparams.n_heads,        sizeof(hparams.n_heads));
         fin.read((char *) &hparams.n_layers,       sizeof(hparams.n_layers));
         fin.read((char *) &hparams.n_vocab,        sizeof(hparams.n_vocab));
@@ -96,10 +99,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
         fin.read((char *) &hparams.clip_qkv,       sizeof(hparams.clip_qkv));
         fin.read((char *) &hparams.ftype,          sizeof(hparams.ftype));
 
+        hparams.n_ctx = std::min(n_ctx, hparams.n_ctx);
         const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR;
 
         printf("%s: d_model        = %d\n", __func__, hparams.d_model);
-        printf("%s: max_seq_len    = %d\n", __func__, hparams.max_seq_len);
+        printf("%s: n_ctx          = %d\n", __func__, hparams.n_ctx);
         printf("%s: n_heads        = %d\n", __func__, hparams.n_heads);
         printf("%s: n_layers       = %d\n", __func__, hparams.n_layers);
         printf("%s: n_vocab        = %d\n", __func__, hparams.n_vocab);