mirror of
https://git.adityakumar.xyz/llama.cpp.git
synced 2024-11-09 15:29:43 +00:00
llama : fix Metal KV cache sync (close #1695)
This commit is contained in:
parent
827f5eda91
commit
d1f563a743
1 changed files with 8 additions and 0 deletions
|
@ -1455,6 +1455,14 @@ static bool llama_eval_internal(
|
|||
// When we implement Matrix x Matrix Metal multiplication, we can avoid this branch.
|
||||
// But for now, we have focused only on Matrix x Vector Metal multiplication.
|
||||
//
|
||||
// TODO: avoid these syncs via shared memory (ref #1696)
|
||||
//
|
||||
if (lctx.ctx_metal) {
|
||||
// We need to sync the GPU KV cache with the CPU KV cache
|
||||
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k);
|
||||
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v);
|
||||
}
|
||||
|
||||
ggml_graph_compute(ctx0, &gf);
|
||||
|
||||
if (lctx.ctx_metal) {
|
||||
|
|
Loading…
Reference in a new issue