ggml : sync cgraph import / export API

2024-11-09 15:29:43 +00:00 · 2023-05-29 19:31:44 +03:00 · 2023-05-29 19:31:44 +03:00 · 7552ac5863
commit 7552ac5863
parent 5d1830b99d
2 changed files with 491 additions and 3 deletions
--- a/ggml.c
+++ b/ggml.c
@ -4030,6 +4030,14 @@ void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
    ctx->no_alloc = no_alloc;
 }
 void * ggml_get_mem_buffer(struct ggml_context * ctx) {
    return ctx->mem_buffer;
 }
 size_t ggml_get_mem_size(struct ggml_context * ctx) {
    return ctx->mem_size;
 }
 // IMPORTANT:
 // when creating "opt" tensors, always save and load the scratch buffer
 // this is an error prone process, but it is necessary to support inplace
@ -4523,6 +4531,23 @@ struct ggml_tensor * ggml_view_tensor(
    return result;
 }
 struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
    struct ggml_object * obj = ctx->objects_begin;
    char * const mem_buffer = ctx->mem_buffer;
    while (obj != NULL) {
        struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
        if (strcmp(cur->name, name) == 0) {
            return cur;
        }
        obj = obj->next;
    }
    return NULL;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // ggml_dup
@ -14532,7 +14557,7 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
    }
 }
-struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const char * name) {
+struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
    for (int i = 0; i < cgraph->n_leafs; i++) {
        struct ggml_tensor * leaf = cgraph->leafs[i];
@ -14552,6 +14577,461 @@ struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const
    return NULL;
 }
 static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fout) {
    const int64_t * ne = tensor->ne;
    const size_t  * nb = tensor->nb;
    fprintf(fout, "%-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %16p %16s\n",
            ggml_type_name(tensor->type),
            ggml_op_name  (tensor->op),
            tensor->n_dims,
            ne[0], ne[1], ne[2], ne[3],
            nb[0], nb[1], nb[2], nb[3],
            tensor->data,
            tensor->name);
 }
 static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char * arg, FILE * fout) {
    const int64_t * ne = tensor->ne;
    const size_t  * nb = tensor->nb;
    fprintf(fout, "%-6s %-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %8d %16p %16s\n",
            arg,
            ggml_type_name(tensor->type),
            ggml_op_name  (tensor->op),
            tensor->n_dims,
            ne[0], ne[1], ne[2], ne[3],
            nb[0], nb[1], nb[2], nb[3],
            tensor->n_tasks,
            tensor->data,
            tensor->name);
 }
 void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
    assert(cgraph->work      == NULL);
    assert(cgraph->work_size == 0);
    uint64_t size_eval = 0;
    // compute size of intermediate results
    // TODO: does not take into account scratch buffers !!!!
    for (int i = 0; i < cgraph->n_nodes; ++i) {
        size_eval += ggml_nbytes(cgraph->nodes[i]);
    }
    // print
    {
        FILE * fout = stdout;
        fprintf(fout, "\n");
        fprintf(fout, "%-16s %8x\n",   "magic",   GGML_FILE_MAGIC);
        fprintf(fout, "%-16s %8d\n",   "version", GGML_FILE_VERSION);
        fprintf(fout, "%-16s %8d\n",   "leafs",   cgraph->n_leafs);
        fprintf(fout, "%-16s %8d\n",   "nodes",   cgraph->n_nodes);
        fprintf(fout, "%-16s %8llu\n", "eval",    size_eval);
        // header
        fprintf(fout, "\n");
        fprintf(fout, "%-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %16s %16s\n",
                "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "DATA", "NAME");
        for (int i = 0; i < cgraph->n_leafs; ++i) {
            ggml_graph_export_leaf(cgraph->leafs[i], fout);
            GGML_ASSERT(cgraph->leafs[i]->op   == GGML_OP_NONE);
            GGML_ASSERT(cgraph->leafs[i]->src0 == NULL);
            GGML_ASSERT(cgraph->leafs[i]->src1 == NULL);
        }
        // header
        fprintf(fout, "\n");
        fprintf(fout, "%-6s %-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %8s %16s %16s\n",
                "ARG", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "NTASKS", "DATA", "NAME");
        for (int i = 0; i < cgraph->n_nodes; ++i) {
            ggml_graph_export_node(cgraph->nodes[i], "DST", fout);
            if (cgraph->nodes[i]->src0) {
                ggml_graph_export_node(cgraph->nodes[i]->src0, "SRC0", fout);
            }
            if (cgraph->nodes[i]->src1) {
                ggml_graph_export_node(cgraph->nodes[i]->src1, "SRC1", fout);
            }
            for (int j = 0; j < GGML_MAX_OPT; ++j) {
                if (cgraph->nodes[i]->opt[j]) {
                    ggml_graph_export_node(cgraph->nodes[i]->opt[j], "OPT", fout);
                }
            }
            fprintf(fout, "\n");
        }
        fprintf(fout, "\n");
    }
    // write binary data
    {
        FILE * fout = fopen(fname, "wb");
        if (!fout) {
            fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
            return;
        }
        // header
        {
            const uint32_t magic   = GGML_FILE_MAGIC;
            const uint32_t version = GGML_FILE_VERSION;
            const uint32_t n_leafs = cgraph->n_leafs;
            const uint32_t nodes   = cgraph->n_nodes;
            fwrite(&magic,     sizeof(uint32_t), 1, fout);
            fwrite(&version,   sizeof(uint32_t), 1, fout);
            fwrite(&n_leafs,   sizeof(uint32_t), 1, fout);
            fwrite(&nodes,     sizeof(uint32_t), 1, fout);
            fwrite(&size_eval, sizeof(uint64_t), 1, fout);
        }
        // leafs
        {
            for (int i = 0; i < cgraph->n_leafs; ++i) {
                const struct ggml_tensor * tensor = cgraph->leafs[i];
                const uint32_t type   = tensor->type;
                const uint32_t op     = tensor->op;
                const uint32_t n_dims = tensor->n_dims;
                fwrite(&type,   sizeof(uint32_t), 1, fout);
                fwrite(&op,     sizeof(uint32_t), 1, fout);
                fwrite(&n_dims, sizeof(uint32_t), 1, fout);
                for (int j = 0; j < GGML_MAX_DIMS; ++j) {
                    const uint64_t ne = tensor->ne[j];
                    const uint64_t nb = tensor->nb[j];
                    fwrite(&ne, sizeof(uint64_t), 1, fout);
                    fwrite(&nb, sizeof(uint64_t), 1, fout);
                }
                // store the pointer address
                {
                    const uint64_t ptr = (uint64_t) tensor->data;
                    fwrite(&ptr, sizeof(uint64_t), 1, fout);
                }
                fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
                // dump the data
                // TODO: pad this to 32 byte boundary
                {
                    const size_t size = ggml_nbytes(tensor);
                    fwrite(tensor->data, sizeof(char), size, fout);
                }
            }
        }
        // nodes
        {
            for (int i = 0; i < cgraph->n_nodes; ++i) {
                const struct ggml_tensor * tensor = cgraph->nodes[i];
                const uint32_t type   = tensor->type;
                const uint32_t op     = tensor->op;
                const uint32_t n_dims = tensor->n_dims;
                fwrite(&type,   sizeof(uint32_t), 1, fout);
                fwrite(&op,     sizeof(uint32_t), 1, fout);
                fwrite(&n_dims, sizeof(uint32_t), 1, fout);
                for (int j = 0; j < GGML_MAX_DIMS; ++j) {
                    const uint64_t ne = tensor->ne[j];
                    const uint64_t nb = tensor->nb[j];
                    fwrite(&ne, sizeof(uint64_t), 1, fout);
                    fwrite(&nb, sizeof(uint64_t), 1, fout);
                }
                // store the pointer address
                {
                    const uint64_t ptr = (uint64_t) tensor->data;
                    fwrite(&ptr, sizeof(uint64_t), 1, fout);
                }
                fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
                // output the op arguments
                {
                    struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL };
                    args[0] = tensor->src0;
                    args[1] = tensor->src1;
                    for (int j = 0; j < GGML_MAX_OPT; ++j) {
                        args[2 + j] = tensor->opt[j];
                    }
                    for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
                        if (args[j]) {
                            int32_t idx = -1;
                            // check if leaf
                            {
                                for (int k = 0; k < cgraph->n_leafs; ++k) {
                                    if (args[j] == cgraph->leafs[k]) {
                                        idx = k;
                                        break;
                                    }
                                }
                            }
                            // check if node
                            if (idx == -1) {
                                for (int k = 0; k < cgraph->n_nodes; ++k) {
                                    if (args[j] == cgraph->nodes[k]) {
                                        idx = GGML_MAX_NODES + k;
                                        break;
                                    }
                                }
                            }
                            if (idx == -1) {
                                fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
                                return;
                            }
                            fwrite(&idx, sizeof(int32_t), 1, fout);
                        } else {
                            const int32_t nul = -1;
                            fwrite(&nul, sizeof(int32_t), 1, fout);
                        }
                    }
                }
            }
        }
        fclose(fout);
    }
 }
 struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
    assert(*ctx_data == NULL);
    assert(*ctx_eval == NULL);
    struct ggml_cgraph result = { 0 };
    struct ggml_tensor * data = NULL;
    // read file into data
    {
        FILE * fin = fopen(fname, "rb");
        if (!fin) {
            fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
            return result;
        }
        size_t fsize = 0;
        fseek(fin, 0, SEEK_END);
        fsize = ftell(fin);
        fseek(fin, 0, SEEK_SET);
        // create the data context
        {
            const size_t overhead = 1*ggml_tensor_overhead();
            struct ggml_init_params params = {
                .mem_size   = fsize + overhead,
                .mem_buffer = NULL,
                .no_alloc   = false,
            };
            *ctx_data = ggml_init(params);
            if (!*ctx_data) {
                fprintf(stderr, "%s: failed to create ggml context\n", __func__);
                return result;
            }
        }
        data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize);
        fread(data->data, sizeof(char), fsize, fin);
        fclose(fin);
    }
    // populate result
    {
        char * ptr = (char *) data->data;
        const uint32_t magic = *(const uint32_t *) ptr; ptr += sizeof(magic);
        if (magic != GGML_FILE_MAGIC) {
            fprintf(stderr, "%s: invalid magic number, got %08x\n", __func__, magic);
            return result;
        }
        const uint32_t version = *(const uint32_t *) ptr; ptr += sizeof(version);
        if (version != GGML_FILE_VERSION) {
            fprintf(stderr, "%s: invalid version number\n", __func__);
            return result;
        }
        const uint32_t n_leafs   = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
        const uint32_t n_nodes   = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
        const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
        result.n_leafs = n_leafs;
        result.n_nodes = n_nodes;
        // create the data context
        {
            const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
            struct ggml_init_params params = {
                .mem_size   = size_eval + overhead,
                .mem_buffer = NULL,
                .no_alloc   = true,
            };
            *ctx_eval = ggml_init(params);
            if (!*ctx_eval) {
                fprintf(stderr, "%s: failed to create ggml context\n", __func__);
                return result;
            }
        }
        // leafs
        {
            uint32_t type;
            uint32_t op;
            uint32_t n_dims;
            for (uint32_t i = 0; i < n_leafs; ++i) {
                type   = *(const uint32_t *) ptr; ptr += sizeof(type);
                op     = *(const uint32_t *) ptr; ptr += sizeof(op);
                n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
                int64_t ne[GGML_MAX_DIMS];
                size_t  nb[GGML_MAX_DIMS];
                for (int j = 0; j < GGML_MAX_DIMS; ++j) {
                    uint64_t ne_cur;
                    uint64_t nb_cur;
                    ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
                    nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
                    ne[j] = ne_cur;
                    nb[j] = nb_cur;
                }
                struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
                tensor->op = (enum ggml_op) op;
                uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
                memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
                tensor->data = (void *) ptr;
                for (int j = 0; j < GGML_MAX_DIMS; ++j) {
                    tensor->nb[j] = nb[j];
                }
                result.leafs[i] = tensor;
                ptr += ggml_nbytes(tensor);
                fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
            }
        }
        ggml_set_no_alloc(*ctx_eval, false);
        // nodes
        {
            uint32_t type;
            uint32_t op;
            uint32_t n_dims;
            for (uint32_t i = 0; i < n_nodes; ++i) {
                type   = *(const uint32_t *) ptr; ptr += sizeof(type);
                op     = *(const uint32_t *) ptr; ptr += sizeof(op);
                n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
                int64_t ne[GGML_MAX_DIMS];
                size_t  nb[GGML_MAX_DIMS];
                for (int j = 0; j < GGML_MAX_DIMS; ++j) {
                    uint64_t ne_cur;
                    uint64_t nb_cur;
                    ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
                    nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
                    ne[j] = ne_cur;
                    nb[j] = nb_cur;
                }
                struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
                tensor->op = (enum ggml_op) op;
                uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
                memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
                for (int j = 0; j < GGML_MAX_DIMS; ++j) {
                    tensor->nb[j] = nb[j];
                }
                // parse args
                {
                    struct ggml_tensor ** args[2 + GGML_MAX_OPT] = {
                        &tensor->src0,
                        &tensor->src1,
                    };
                    for (int j = 0; j < GGML_MAX_OPT; ++j) {
                        args[2 + j] = &tensor->opt[j];
                    }
                    for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
                        const int32_t arg_idx = *(const int32_t *) ptr; ptr += sizeof(arg_idx);
                        if (arg_idx == -1) {
                            continue;
                        }
                        if (arg_idx < GGML_MAX_NODES) {
                            *args[j] = result.leafs[arg_idx];
                        } else {
                            *args[j] = result.nodes[arg_idx - GGML_MAX_NODES];
                        }
                    }
                }
                result.nodes[i] = tensor;
                fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
            }
        }
    }
    return result;
 }
 void ggml_graph_print(const struct ggml_cgraph * cgraph) {
    int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
--- a/ggml.h
+++ b/ggml.h
@ -451,9 +451,12 @@ extern "C" {
    GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx);
-    GGML_API size_t  ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
+    GGML_API size_t  ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
    GGML_API void    ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
    GGML_API void *  ggml_get_mem_buffer(struct ggml_context * ctx);
    GGML_API size_t  ggml_get_mem_size  (struct ggml_context * ctx);
    GGML_API struct ggml_tensor * ggml_new_tensor(
            struct ggml_context * ctx,
            enum   ggml_type type,
@ -492,6 +495,8 @@ extern "C" {
    GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
    GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
    GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
    GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
    GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
    GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
@ -978,7 +983,10 @@ extern "C" {
    GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
    GGML_API void ggml_graph_reset  (struct ggml_cgraph * cgraph);
-    GGML_API struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const char * name);
+    GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
    GGML_API void               ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
    GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
    // print info and performance information for the graph
    GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);