From c56b7152690ca25cfd66b20210b3629e6c1e739b Mon Sep 17 00:00:00 2001 From: Pavol Rusnak Date: Fri, 14 Apr 2023 20:05:37 +0200 Subject: [PATCH] Expose type name from ggml (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid duplication of type names in utils Co-authored-by: HÃ¥kon H. Hitland --- examples/quantize-stats/quantize-stats.cpp | 14 ++++++-------- ggml.c | 17 +++++++++++++++++ ggml.h | 2 ++ llama.cpp | 14 ++------------ 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index c786fe2..0503009 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -16,9 +16,6 @@ #include #include -static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" }; -static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list"); - struct quantize_stats_params { std::string model = "models/7B/ggml-model-f16.bin"; bool verbose = false; @@ -224,7 +221,7 @@ int main(int argc, char ** argv) { break; } int j; - for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], type_strs[j]) != 0; j++) { + for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], ggml_type_name((ggml_type) i)) != 0; j++) { // find match } if (j < GGML_TYPE_COUNT) { @@ -279,7 +276,7 @@ int main(int argc, char ** argv) { continue; } if (params.verbose) { - printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), type_strs[kv_tensor.second->type], ggml_nelements(kv_tensor.second)); + printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), ggml_type_name(kv_tensor.second->type), ggml_nelements(kv_tensor.second)); } if (kv_tensor.second->type == GGML_TYPE_F16) { is_f16 = true; @@ -304,13 +301,14 @@ int main(int argc, char ** argv) { // loop throught quantization types for (int i = 0; i < GGML_TYPE_COUNT; i++) { + const ggml_type type = (ggml_type) i; if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) { continue; } quantize_fns_t qfns = ggml_internal_get_quantize_fn(i); if (qfns.quantize_row_q && qfns.dequantize_row_q) { if (params.verbose) { - printf("testing %s ...\n", type_strs[i]); + printf("testing %s ...\n", ggml_type_name(type)); } error_stats global_stats {}; @@ -322,7 +320,7 @@ int main(int argc, char ** argv) { if (params.verbose) { printf(" %s ...\n", kv_tensor.first.c_str()); } - std::string layer_name { type_strs[i] }; + std::string layer_name { ggml_type_name(type) }; layer_name += "::" + kv_tensor.first; test_roundtrip_on_layer( layer_name, @@ -337,7 +335,7 @@ int main(int argc, char ** argv) { ); } - print_error_stats(type_strs[i], global_stats, params.print_histogram); + print_error_stats(ggml_type_name(type), global_stats, params.print_histogram); } } diff --git a/ggml.c b/ggml.c index ce48b78..1574d64 100644 --- a/ggml.c +++ b/ggml.c @@ -2671,6 +2671,18 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = { }; static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_SIZE is outdated"); + +static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = { + [GGML_TYPE_F32] = "f32", + [GGML_TYPE_F16] = "f16", + [GGML_TYPE_Q4_0] = "q4_0", + [GGML_TYPE_Q4_1] = "q4_1", + [GGML_TYPE_I8] = "i8", + [GGML_TYPE_I16] = "i16", + [GGML_TYPE_I32] = "i32", +}; +static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_NAME is outdated"); + static const char * GGML_OP_LABEL[GGML_OP_COUNT] = { "NONE", @@ -2895,6 +2907,11 @@ float ggml_type_sizef(enum ggml_type type) { return ((float)(GGML_TYPE_SIZE[type]))/GGML_BLCK_SIZE[type]; } +const char * ggml_type_name(enum ggml_type type) { + return GGML_TYPE_NAME[type]; +} + + size_t ggml_element_size(const struct ggml_tensor * tensor) { return GGML_TYPE_SIZE[tensor->type]; } diff --git a/ggml.h b/ggml.h index bdff0b4..617298a 100644 --- a/ggml.h +++ b/ggml.h @@ -354,6 +354,8 @@ int ggml_blck_size (enum ggml_type type); size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float +const char * ggml_type_name(enum ggml_type type); + size_t ggml_element_size(const struct ggml_tensor * tensor); struct ggml_context * ggml_init(struct ggml_init_params params); diff --git a/llama.cpp b/llama.cpp index c722956..be8c4cd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -269,16 +269,6 @@ static std::string llama_format_tensor_shape(const std::vector & ne) { return ret; } -static const char * llama_format_type(enum ggml_type type) { - switch (type) { - case GGML_TYPE_F32: return "f32"; - case GGML_TYPE_F16: return "f16"; - case GGML_TYPE_Q4_0: return "q4_0"; - case GGML_TYPE_Q4_1: return "q4_1"; - default: LLAMA_ASSERT(false); - } -} - static size_t llama_calc_tensor_size(const std::vector & ne, enum ggml_type type) { size_t size = ggml_type_size(type); for (uint32_t dim : ne) { @@ -1582,7 +1572,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s printf("[%zu/%zu] %36s - %s, type = %6s, ", ++idx, model_loader->tensors_map.tensors.size(), tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(), - llama_format_type(tensor.type)); + ggml_type_name(tensor.type)); // This used to be a regex, but has an extreme cost to compile times. bool quantize = tensor.name.rfind("weight") == tensor.name.size() - 6; // ends with 'weight'? @@ -1615,7 +1605,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s f32_data[i] = ggml_fp16_to_fp32(f16_data[i]); } } else { - throw format("type %s unsupported for integer quantization", llama_format_type(tensor.type)); + throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type)); } printf("quantizing .. ");