From efd05648c88a0923a55f56e7ce1b0f9c33410afb Mon Sep 17 00:00:00 2001
From: Arik Poznanski <arikpoz@users.noreply.github.com>
Date: Mon, 17 Apr 2023 17:41:53 +0300
Subject: [PATCH] llama : well-defined static initialization of complex objects
 (#927)

* Replaced static initialization of complex objects with a initialization on first use. This prevents an undefined behavior on program run, for example, crash in Release build, works in Debug build

* replaced use of auto with exact type to avoid using -std=c++14

* Made the assessors functions for static maps be static const
---
 llama.cpp                  | 72 +++++++++++++++++++++++---------------
 tests/test-tokenizer-0.cpp | 20 ++++++-----
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 3b916e5..cdd8bd1 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -42,35 +42,51 @@ static const size_t MB = 1024*1024;
 // TODO: dynamically determine these sizes
 //       needs modifications in ggml
 
-static const std::map<e_model, size_t> MEM_REQ_SCRATCH0 = {
-    { MODEL_7B,    512ull*MB },
-    { MODEL_13B,   512ull*MB },
-    { MODEL_30B,   512ull*MB },
-    { MODEL_65B,   512ull*MB },
-};
+static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
+{
+    static std::map<e_model, size_t> _MEM_REQ_SCRATCH0 = {
+        { MODEL_7B,    512ull * MB },
+        { MODEL_13B,   512ull * MB },
+        { MODEL_30B,   512ull * MB },
+        { MODEL_65B,   512ull * MB },
+    };
+    return _MEM_REQ_SCRATCH0;
+}
 
-static const std::map<e_model, size_t> MEM_REQ_SCRATCH1 = {
-    { MODEL_7B,    512ull*MB },
-    { MODEL_13B,   512ull*MB },
-    { MODEL_30B,   512ull*MB },
-    { MODEL_65B,   512ull*MB },
+static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
+{
+    static std::map<e_model, size_t> _MEM_REQ_SCRATCH1 = {
+        { MODEL_7B,    512ull * MB },
+        { MODEL_13B,   512ull * MB },
+        { MODEL_30B,   512ull * MB },
+        { MODEL_65B,   512ull * MB },
+    };
+    return _MEM_REQ_SCRATCH1;
 };
 
 // 2*n_embd*n_ctx*n_layer*sizeof(float16)
-static const std::map<e_model, size_t> MEM_REQ_KV_SELF = {
-    { MODEL_7B,   1026ull*MB },
-    { MODEL_13B,  1608ull*MB },
-    { MODEL_30B,  3124ull*MB },
-    { MODEL_65B,  5120ull*MB },
+static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
+{
+    static std::map<e_model, size_t> _MEM_REQ_KV_SELF = {
+        { MODEL_7B,   1026ull * MB },
+        { MODEL_13B,  1608ull * MB },
+        { MODEL_30B,  3124ull * MB },
+        { MODEL_65B,  5120ull * MB },
+    };
+    return _MEM_REQ_KV_SELF;
 };
 
 // this is mostly needed for temporary mul_mat buffers to dequantize the data
 // not actually needed if BLAS is disabled
-static const std::map<e_model, size_t> MEM_REQ_EVAL = {
-    { MODEL_7B,   768ull*MB },
-    { MODEL_13B, 1024ull*MB },
-    { MODEL_30B, 1280ull*MB },
-    { MODEL_65B, 1536ull*MB },
+static const std::map<e_model, size_t> & MEM_REQ_EVAL()
+{
+    static std::map<e_model, size_t> _MEM_REQ_EVAL = {
+        { MODEL_7B,   768ull * MB },
+        { MODEL_13B, 1024ull * MB },
+        { MODEL_30B, 1280ull * MB },
+        { MODEL_65B, 1536ull * MB },
+    };
+    return _MEM_REQ_EVAL;
 };
 
 // default hparams (LLaMA 7B)
@@ -899,13 +915,13 @@ static void llama_model_load_internal(
         const size_t mem_required =
             ctx_size +
             mmapped_size +
-            MEM_REQ_SCRATCH0.at(model.type) +
-            MEM_REQ_SCRATCH1.at(model.type) +
-            MEM_REQ_EVAL.at    (model.type);
+            MEM_REQ_SCRATCH0().at(model.type) +
+            MEM_REQ_SCRATCH1().at(model.type) +
+            MEM_REQ_EVAL().at(model.type);
 
         // this is the memory required by one llama_state
         const size_t mem_required_state =
-            scale*MEM_REQ_KV_SELF.at(model.type);
+            scale*MEM_REQ_KV_SELF().at(model.type);
 
         fprintf(stderr, "%s: mem required  = %7.2f MB (+ %7.2f MB per state)\n", __func__,
                 mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
@@ -1732,10 +1748,10 @@ struct llama_context * llama_init_from_file(
             ctx->embedding.resize(hparams.n_embd);
         }
 
-        ctx->buf_compute.resize(MEM_REQ_EVAL.at(ctx->model.type));
+        ctx->buf_compute.resize(MEM_REQ_EVAL().at(ctx->model.type));
 
-        ctx->buf_scratch[0].resize(MEM_REQ_SCRATCH0.at(ctx->model.type));
-        ctx->buf_scratch[1].resize(MEM_REQ_SCRATCH1.at(ctx->model.type));
+        ctx->buf_scratch[0].resize(MEM_REQ_SCRATCH0().at(ctx->model.type));
+        ctx->buf_scratch[1].resize(MEM_REQ_SCRATCH1().at(ctx->model.type));
     }
 
     return ctx;
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
index 55b086d..b089845 100644
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@@ -5,13 +5,17 @@
 #include <map>
 #include <vector>
 
-static const std::map<std::string, std::vector<llama_token>> k_tests = {
-    { "Hello World",        { 1,  10994,   2787, }, },
-    { " Hello World",       { 1,  15043,   2787, }, },
-    { " Hello World!",      { 1,  15043,   2787,  29991, }, },
-    { " this is 🦙.cpp",    { 1,    445,    338,  29871,    243,    162,    169,    156,  29889,   8223, }, },
-    { "w048 7tuijk dsdfhu", { 1,  29893,  29900,  29946,  29947,  29871,  29955,   9161,  13535,  18031,   2176,   6905, }, },
-    { "нещо на Български",  { 1,    821,   4851,    665,   1386,  29713,   1305, }, },
+static const std::map<std::string, std::vector<llama_token>> & k_tests()
+{
+    static std::map<std::string, std::vector<llama_token>> _k_tests = {
+        { "Hello World",        { 1,  10994,   2787, }, },
+        { " Hello World",       { 1,  15043,   2787, }, },
+        { " Hello World!",      { 1,  15043,   2787,  29991, }, },
+        { " this is 🦙.cpp",    { 1,    445,    338,  29871,    243,    162,    169,    156,  29889,   8223, }, },
+        { "w048 7tuijk dsdfhu", { 1,  29893,  29900,  29946,  29947,  29871,  29955,   9161,  13535,  18031,   2176,   6905, }, },
+        { "нещо на Български",  { 1,    821,   4851,    665,   1386,  29713,   1305, }, },
+    };
+    return _k_tests;
 };
 
 int main(int argc, char **argv) {
@@ -47,7 +51,7 @@ int main(int argc, char **argv) {
         return 2;
     }
 
-    for (const auto & test_kv : k_tests) {
+    for (const auto & test_kv : k_tests()) {
         std::vector<llama_token> res(test_kv.first.size());
         const int n = llama_tokenize(ctx, test_kv.first.c_str(), res.data(), res.size(), true);
         res.resize(n);