mirror of
https://git.adityakumar.xyz/llama.cpp.git
synced 2024-11-09 15:29:43 +00:00
CUDA: use min compute capability of GPUs actually used (#2506)
This commit is contained in:
parent
02f9d96a86
commit
4329d1acb0
1 changed files with 2 additions and 1 deletions
|
@ -5347,7 +5347,8 @@ void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_
|
||||||
} else {
|
} else {
|
||||||
int min_compute_capability = INT_MAX;
|
int min_compute_capability = INT_MAX;
|
||||||
for (int id = 0; id < g_device_count; ++id) {
|
for (int id = 0; id < g_device_count; ++id) {
|
||||||
if (min_compute_capability > g_compute_capabilities[id]) {
|
if (min_compute_capability > g_compute_capabilities[id]
|
||||||
|
&& g_tensor_split[id] < (id + 1 < g_device_count ? g_tensor_split[id + 1] : 1.0f)) {
|
||||||
min_compute_capability = g_compute_capabilities[id];
|
min_compute_capability = g_compute_capabilities[id];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue