diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fdbedd..bbf5995 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -337,7 +337,7 @@ endif() add_library(llama llama.cpp llama.h - llama_util.h) + llama-util.h) target_include_directories(llama PUBLIC .) target_compile_features(llama PUBLIC cxx_std_11) # don't bump diff --git a/Makefile b/Makefile index 5a1cb3e..fd695d7 100644 --- a/Makefile +++ b/Makefile @@ -168,7 +168,7 @@ $(info ) ggml.o: ggml.c ggml.h ggml-cuda.h $(CC) $(CFLAGS) -c $< -o $@ -llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama_util.h +llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h $(CXX) $(CXXFLAGS) -c $< -o $@ common.o: examples/common.cpp examples/common.h diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index f5f02ec..f1531ba 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -66,6 +66,7 @@ int main(int argc, char ** argv) { // first run printf("\n%s", params.prompt.c_str()); + for (auto i = 0; i < params.n_predict; i++) { auto logits = llama_get_logits(ctx); auto n_vocab = llama_n_vocab(ctx); @@ -86,6 +87,7 @@ int main(int argc, char ** argv) { } n_past += 1; } + printf("\n\n"); // free old model @@ -101,7 +103,13 @@ int main(int argc, char ** argv) { fprintf(stderr, "\n%s : failed to validate state size\n", __func__); return 1; } - fread(state_mem, 1, state_size, fp_read); + + const size_t ret = fread(state_mem, 1, state_size, fp_read); + if (ret != state_size) { + fprintf(stderr, "\n%s : failed to read state\n", __func__); + return 1; + } + llama_set_state_data(ctx2, state_mem); // could also read directly from memory mapped file fclose(fp_read); }