diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 0add6ad..50e14c4 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -4,6 +4,10 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + float frand() { return (float)rand()/(float)RAND_MAX; } @@ -1470,7 +1474,7 @@ struct ggml_tensor * square_error_loss(struct ggml_context * ctx, struct ggml_te } struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { - const float eps = 1e-3; + const float eps = 1e-3f; return ggml_sum(ctx, ggml_neg(ctx, diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 9f9ed9d..39d15ca 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -16,6 +16,10 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + float tensor_sum_elements(const ggml_tensor * tensor) { float sum = 0; if (tensor->type==GGML_TYPE_F32) { @@ -29,9 +33,9 @@ float tensor_sum_elements(const ggml_tensor * tensor) { } void tensor_dump(const ggml_tensor * tensor, const char * name) { - printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", name, + printf("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi) - ", name, tensor->type, ggml_type_name(tensor->type), - (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]); + tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]); float sum = tensor_sum_elements(tensor); printf("Sum of tensor %s is %6.2f\n", name, sum); } @@ -120,7 +124,7 @@ int main(int argc, char ** argv) { ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS ctx_size += 1024*1024*16; - printf("Allocating Memory of size %li bytes, %li MB\n",ctx_size, (ctx_size/1024/1024)); + printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024)); struct ggml_init_params params = { /*.mem_size =*/ ctx_size, diff --git a/examples/common.cpp b/examples/common.cpp index b47f062..055383b 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -28,6 +28,10 @@ #include #endif +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + int32_t get_num_physical_cores() { #ifdef __linux__ // enumerate the set of thread siblings, num entries is num cores @@ -373,7 +377,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } else { throw std::exception(); } - } catch (const std::exception &e) { + } catch (const std::exception&) { invalid_param = true; break; } diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 03603b1..860f99f 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -4,6 +4,10 @@ #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + int main(int argc, char ** argv) { gpt_params params; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index efa913e..ef9e75f 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -28,6 +28,10 @@ #include #endif +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + static console_state con_st; static llama_context ** g_ctx; @@ -348,7 +352,7 @@ int main(int argc, char ** argv) { if ((int)embd.size() > max_embd_size) { auto skipped_tokens = embd.size() - max_embd_size; console_set_color(con_st, CONSOLE_COLOR_ERROR); - printf("<>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); + printf("<>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); console_set_color(con_st, CONSOLE_COLOR_DEFAULT); fflush(stdout); embd.resize(max_embd_size); diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index e19c682..ae8cfe0 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -5,6 +5,10 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + std::vector softmax(const std::vector& logits) { std::vector probs(logits.size()); float max_logit = logits[0]; diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 6e4f7e1..6b8018e 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -19,6 +19,10 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + struct quantize_stats_params { std::string model = "models/7B/ggml-model-f16.bin"; bool verbose = false; diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 91f04b6..da4d37a 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -37,7 +37,7 @@ int main(int argc, char ** argv) { // init auto ctx = llama_init_from_file(params.model.c_str(), lparams); auto tokens = std::vector(params.n_ctx); - auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), tokens.size(), true); + auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), int(tokens.size()), true); if (n_prompt_tokens < 1) { fprintf(stderr, "%s : failed to tokenize prompt\n", __func__); diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 51271b4..7ec8595 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -12,6 +12,9 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif struct random_normal_distribution { std::mt19937 gen; @@ -20,7 +23,6 @@ struct random_normal_distribution { float max; }; - struct random_uniform_distribution { std::mt19937 gen; std::uniform_real_distribution rd; @@ -2366,7 +2368,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) { file->write_u32(0); file->write_u32(0); file->write_u32(GGML_TYPE_F32); - file->seek(-file->tell() & 31, SEEK_CUR); + file->seek(0-file->tell() & 31, SEEK_CUR); return; } const char * name = ggml_get_name(tensor); @@ -2381,7 +2383,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) { file->write_u32(tensor->type); file->write_raw(ne, sizeof(ne[0]) * nd); file->write_raw(name, name_len); - file->seek(-file->tell() & 31, SEEK_CUR); + file->seek(0-file->tell() & 31, SEEK_CUR); file->write_raw(tensor->data, ggml_nbytes(tensor)); } @@ -2402,7 +2404,7 @@ void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) { std::string name = file->read_string(name_len); GGML_ASSERT(strncmp(ggml_get_name(tensor), name.c_str(), sizeof(tensor->name)-1) == 0); - file->seek(-file->tell() & 31, SEEK_CUR); + file->seek(0-file->tell() & 31, SEEK_CUR); file->read_raw(tensor->data, ggml_nbytes(tensor)); } @@ -2756,8 +2758,8 @@ struct train_params get_default_train_params() { params.lbfgs_n_iter = 16; params.adam_n_iter = 16; - params.adam_alpha = 1e-3; - params.adam_decay = 1e-3; + params.adam_alpha = 1e-3f; + params.adam_decay = 1e-3f; params.mem_model_gb = 2; params.mem_compute_gb = 24; @@ -3331,8 +3333,8 @@ int main(int argc, char ** argv) { int n_gen = params.n_predict; int sample_ctx = n_tokens - n_tokens/8; - sampler.params.temp = 0.2; - sampler.params.repeat_penalty = 1.1; + sampler.params.temp = 0.2f; + sampler.params.repeat_penalty = 1.1f; sampler.params.mirostat = 2; init_sampler(&sampler, lctx); diff --git a/ggml.c b/ggml.c index c0efa19..0eda7f3 100644 --- a/ggml.c +++ b/ggml.c @@ -35,6 +35,12 @@ #define static_assert(cond, msg) struct global_scope_noop_trick #endif +#if defined(_MSC_VER) +// disable "possible loss of data" to avoid hundreds of casts +// we should just be careful :) +#pragma warning(disable: 4244 4267) +#endif + #if defined(_WIN32) #include diff --git a/llama.cpp b/llama.cpp index b8bc0d8..a904388 100644 --- a/llama.cpp +++ b/llama.cpp @@ -40,6 +40,10 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + #define LLAMA_USE_SCRATCH #define LLAMA_MAX_SCRATCH_BUFFERS 16 diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index 26bf50c..7b18090 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -10,6 +10,10 @@ #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + constexpr int kVecSize = 1 << 18; float drawFromGaussianPdf(std::mt19937& rndm) { diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 728460b..c40f1b2 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -9,12 +9,15 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif -const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001; -const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002; -const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075; -const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040; -const float MAX_DOT_PRODUCT_ERROR = 0.02; +const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f; +const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f; +const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f; +const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f; +const float MAX_DOT_PRODUCT_ERROR = 0.02f; const char* RESULT_STR[] = {"ok", "FAILED"}; diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index d551445..6003757 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -13,6 +13,10 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + #define MAX_ALIGNMENT 64 #define QK 32 #define WARMUP 5 diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index 0e67512..5d693f7 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -176,27 +176,27 @@ void test_frequency_presence_penalty( int main(void) { ggml_time_init(); - test_top_k({0.1, 0.2, 0.3, 0.4}, {0.4}, 1); - test_top_k({0.1, 0.2, 0.3, 0.4}, {0.4, 0.3, 0.2}, 3); + test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 1); + test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f}, 3); - test_top_p({0.1, 0.2, 0.3, 0.4}, {0.4}, 0); - test_top_p({0.1, 0.2, 0.3, 0.4}, {0.4, 0.3}, 0.7); - test_top_p({0.1, 0.2, 0.3, 0.4}, {0.4, 0.3, 0.2, 0.1}, 1); + test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 0); + test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f}, 0.7f); + test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1); - test_tfs({0.1, 0.15, 0.2, 0.25, 0.3}, {0.3}, 0.25); - test_tfs({0.1, 0.15, 0.2, 0.25, 0.3}, {0.3, 0.25}, 0.75); - test_tfs({0.1, 0.15, 0.2, 0.25, 0.3}, {0.3, 0.25}, 0.99); + test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f}, 0.25f); + test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f, 0.25f}, 0.75f); + test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f, 0.25f}, 0.99f); - test_typical({0.97, 0.01, 0.01, 0.01}, {0.97}, 0.5); - test_typical({0.4, 0.2, 0.2, 0.2}, {0.2, 0.2, 0.2}, 0.5); + test_typical({0.97f, 0.01f, 0.01f, 0.01f}, {0.97f}, 0.5f); + test_typical({0.4f, 0.2f, 0.2f, 0.2f}, {0.2f, 0.2f, 0.2f}, 0.5f); - test_repetition_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0}, {0.25, 0.25, 0.25, 0.25, 0}, 50.0); - test_repetition_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2}, {0.5, 0.5, 0, 0, 0}, 50.0); - test_repetition_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2, 0, 0}, {0.5, 0.5, 0, 0, 0}, 50.0); + test_repetition_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0}, {0.25f, 0.25f, 0.25f, 0.25f, 0}, 50.0f); + test_repetition_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2}, {0.5f, 0.5f, 0, 0, 0}, 50.0f); + test_repetition_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 0, 0}, {0.5f, 0.5f, 0, 0, 0}, 50.0f); - test_frequency_presence_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0}, {0.249997, 0.249997, 0.249997, 0.249997, 0.000011}, 5.0, 5.0); - test_frequency_presence_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2}, {0.499966, 0.499966, 0.000023, 0.000023, 0.000023}, 5.0, 5.0); - test_frequency_presence_penalty({0.2, 0.2, 0.2, 0.2, 0.2}, {0, 1, 2, 0, 0}, {0.499977, 0.499977, 0.000023, 0.000023, 0.000000}, 5.0, 5.0); + test_frequency_presence_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0}, {0.249997f, 0.249997f, 0.249997f, 0.249997f, 0.000011f}, 5.0f, 5.0f); + test_frequency_presence_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2}, {0.499966f, 0.499966f, 0.000023f, 0.000023f, 0.000023f}, 5.0f, 5.0f); + test_frequency_presence_penalty({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 0, 0}, {0.499977f, 0.499977f, 0.000023f, 0.000023f, 0.000000f}, 5.0f, 5.0f); printf("OK\n"); } diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp index b089845..ab1538a 100644 --- a/tests/test-tokenizer-0.cpp +++ b/tests/test-tokenizer-0.cpp @@ -53,7 +53,7 @@ int main(int argc, char **argv) { for (const auto & test_kv : k_tests()) { std::vector res(test_kv.first.size()); - const int n = llama_tokenize(ctx, test_kv.first.c_str(), res.data(), res.size(), true); + const int n = llama_tokenize(ctx, test_kv.first.c_str(), res.data(), int(res.size()), true); res.resize(n); bool correct = res.size() == test_kv.second.size();