From 051e1b0e6a6e3aee7d989b47760980e6fda5861c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 17 Jun 2023 19:30:22 +0300 Subject: [PATCH] llama : fix kv_cache `n` init (close #1903) --- .gitignore | 1 + examples/CMakeLists.txt | 1 + llama.cpp | 2 ++ 3 files changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index e68fd72..e7bfd52 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ models/* /perplexity /embedding /train-text-from-scratch +/simple /benchmark-matmult /vdot /server diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index de005f3..cf9c4a2 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -38,6 +38,7 @@ else() add_subdirectory(benchmark) add_subdirectory(baby-llama) add_subdirectory(train-text-from-scratch) + add_subdirectory(simple) if (LLAMA_METAL) add_subdirectory(metal) endif() diff --git a/llama.cpp b/llama.cpp index a50846f..a2916b3 100644 --- a/llama.cpp +++ b/llama.cpp @@ -886,6 +886,7 @@ static bool kv_cache_init( const int64_t n_elements = n_embd*n_mem; cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB); + cache.n = 0; struct ggml_init_params params; params.mem_size = cache.buf.size; @@ -904,6 +905,7 @@ static bool kv_cache_init( ggml_set_name(cache.k, "cache_k"); ggml_set_name(cache.v, "cache_v"); + (void) n_gpu_layers; #ifdef GGML_USE_CUBLAS if (n_gpu_layers > n_layer + 1) { ggml_cuda_assign_buffers_no_scratch(cache.v);