mirror of
https://git.adityakumar.xyz/llama.cpp.git
synced 2024-11-09 15:29:43 +00:00
ggml : improve graph build time via hash table lookup (#2329)
* improve graph build time * ggml_tensor : use 1 bit per flag * use a hash table instead
This commit is contained in:
parent
82552b7f54
commit
da1889834a
3 changed files with 42 additions and 12 deletions
43
ggml.c
43
ggml.c
|
@ -15665,6 +15665,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");
|
||||||
|
|
||||||
|
static size_t hash(void * p) {
|
||||||
|
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool hash_insert(void * hash_table[], void * p) {
|
||||||
|
size_t h = hash(p);
|
||||||
|
|
||||||
|
// linear probing
|
||||||
|
size_t i = h;
|
||||||
|
while (hash_table[i] != NULL && hash_table[i] != p) {
|
||||||
|
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
||||||
|
if (i == h) {
|
||||||
|
// hash table is full
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hash_table[i] == p) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// insert
|
||||||
|
hash_table[i] = p;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
||||||
if (node->grad == NULL) {
|
if (node->grad == NULL) {
|
||||||
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
||||||
|
@ -15675,16 +15703,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if already visited
|
// check if already visited
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
if (hash_insert(cgraph->visited_hash_table, node)) {
|
||||||
if (cgraph->nodes[i] == node) {
|
return;
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < cgraph->n_leafs; i++) {
|
|
||||||
if (cgraph->leafs[i] == node) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
||||||
|
@ -15747,6 +15767,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
||||||
/*.nodes =*/ { NULL },
|
/*.nodes =*/ { NULL },
|
||||||
/*.grads =*/ { NULL },
|
/*.grads =*/ { NULL },
|
||||||
/*.leafs =*/ { NULL },
|
/*.leafs =*/ { NULL },
|
||||||
|
/*.hash_table =*/ { NULL },
|
||||||
/*.perf_runs =*/ 0,
|
/*.perf_runs =*/ 0,
|
||||||
/*.perf_cycles =*/ 0,
|
/*.perf_cycles =*/ 0,
|
||||||
/*.perf_time_us =*/ 0,
|
/*.perf_time_us =*/ 0,
|
||||||
|
@ -15788,7 +15809,7 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
||||||
|
|
||||||
if (node->is_param) {
|
if (node->is_param) {
|
||||||
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
||||||
ggml_build_forward_impl(&result, node->grad, true);
|
ggml_build_forward_expand(&result, node->grad);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
9
ggml.h
9
ggml.h
|
@ -442,7 +442,7 @@ extern "C" {
|
||||||
|
|
||||||
void * extra; // extra things e.g. for ggml-cuda.cu
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
||||||
|
|
||||||
char padding[8];
|
char padding[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||||
|
@ -463,6 +463,11 @@ extern "C" {
|
||||||
void * abort_callback_data;
|
void * abort_callback_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// next prime after GGML_MAX_NODES
|
||||||
|
// #define GGML_GRAPH_HASHTABLE_SIZE 4099
|
||||||
|
// next prime after GGML_MAX_NODES * 2 (nodes + leafs)
|
||||||
|
#define GGML_GRAPH_HASHTABLE_SIZE 8273
|
||||||
|
|
||||||
// computation graph
|
// computation graph
|
||||||
struct ggml_cgraph {
|
struct ggml_cgraph {
|
||||||
int n_nodes;
|
int n_nodes;
|
||||||
|
@ -472,6 +477,8 @@ extern "C" {
|
||||||
struct ggml_tensor * grads[GGML_MAX_NODES];
|
struct ggml_tensor * grads[GGML_MAX_NODES];
|
||||||
struct ggml_tensor * leafs[GGML_MAX_NODES];
|
struct ggml_tensor * leafs[GGML_MAX_NODES];
|
||||||
|
|
||||||
|
void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
|
||||||
|
|
||||||
// performance
|
// performance
|
||||||
int perf_runs;
|
int perf_runs;
|
||||||
int64_t perf_cycles;
|
int64_t perf_cycles;
|
||||||
|
|
|
@ -1714,6 +1714,8 @@ static bool llama_eval_internal(
|
||||||
// run the computation
|
// run the computation
|
||||||
ggml_build_forward_expand(&gf, cur);
|
ggml_build_forward_expand(&gf, cur);
|
||||||
|
|
||||||
|
// fprintf(stderr, "graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf.n_nodes, gf.n_leafs);
|
||||||
|
|
||||||
#if GGML_USE_MPI
|
#if GGML_USE_MPI
|
||||||
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, &gf, n_layer);
|
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, &gf, n_layer);
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue