diff --git a/Makefile b/Makefile index 61f2c77..5aa0ded 100644 --- a/Makefile +++ b/Makefile @@ -193,8 +193,12 @@ ifdef LLAMA_CUBLAS CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o - NVCC = nvcc NVCCFLAGS = --forward-unknown-to-host-compiler +ifdef LLAMA_CUDA_NVCC + NVCC = $(LLAMA_CUDA_NVCC) +else + NVCC = nvcc +endif #LLAMA_CUDA_NVCC ifdef CUDA_DOCKER_ARCH NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) else @@ -223,7 +227,9 @@ ifdef LLAMA_CUDA_KQUANTS_ITER else NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 endif - +ifdef LLAMA_CUDA_CCBIN + NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) +endif ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ endif # LLAMA_CUBLAS