mirror of
https://git.adityakumar.xyz/llama.cpp.git
synced 2024-11-09 23:29:44 +00:00
feature : support blis and other blas implementation (#1536)
* feature: add blis support * feature: allow all BLA_VENDOR to be assigned in cmake arguments. align with whisper.cpp pr 927 * fix: version detection for BLA_SIZEOF_INTEGER, recover min version of cmake * Fix typo in INTEGER Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Fix: blas changes on ci --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
9ecb30f959
commit
b8ee340abe
5 changed files with 104 additions and 25 deletions
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
|
@ -165,7 +165,7 @@ jobs:
|
||||||
- build: 'clblast'
|
- build: 'clblast'
|
||||||
defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
|
defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
|
||||||
- build: 'openblas'
|
- build: 'openblas'
|
||||||
defines: '-DLLAMA_OPENBLAS=ON -DBLAS_LIBRARIES="/LIBPATH:$env:RUNNER_TEMP/openblas/lib" -DOPENBLAS_INC="$env:RUNNER_TEMP/openblas/include"'
|
defines: '-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include"'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
|
|
67
BLIS.md
Normal file
67
BLIS.md
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
BLIS Installation Manual
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
BLIS is a portable software framework for high-performance BLAS-like dense linear algebra libraries. It has received awards and recognition, including the 2023 James H. Wilkinson Prize for Numerical Software and the 2020 SIAM Activity Group on Supercomputing Best Paper Prize. BLIS provides a new BLAS-like API and a compatibility layer for traditional BLAS routine calls. It offers features such as object-based API, typed API, BLAS and CBLAS compatibility layers.
|
||||||
|
|
||||||
|
Project URL: https://github.com/flame/blis
|
||||||
|
|
||||||
|
### Prepare:
|
||||||
|
|
||||||
|
Compile BLIS:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/flame/blis
|
||||||
|
cd blis
|
||||||
|
./configure --enable-cblas -t openmp,pthreads auto
|
||||||
|
# will install to /usr/local/ by default.
|
||||||
|
make -j
|
||||||
|
```
|
||||||
|
|
||||||
|
Install BLIS:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo make install
|
||||||
|
```
|
||||||
|
|
||||||
|
We recommend using openmp since it's easier to modify the cores been used.
|
||||||
|
|
||||||
|
### llama.cpp compilation
|
||||||
|
|
||||||
|
Makefile:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make LLAMA_BLIS=1 -j
|
||||||
|
# make LLAMA_BLIS=1 benchmark-matmult
|
||||||
|
```
|
||||||
|
|
||||||
|
CMake:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=FLAME ..
|
||||||
|
make -j
|
||||||
|
```
|
||||||
|
|
||||||
|
### llama.cpp execution
|
||||||
|
|
||||||
|
According to the BLIS documentation, we could set the following
|
||||||
|
environment variables to modify the behavior of openmp:
|
||||||
|
|
||||||
|
```
|
||||||
|
export GOMP_GPU_AFFINITY="0-19"
|
||||||
|
export BLIS_NUM_THREADS=14
|
||||||
|
```
|
||||||
|
|
||||||
|
And then run the binaries as normal.
|
||||||
|
|
||||||
|
|
||||||
|
### Intel specific issue
|
||||||
|
|
||||||
|
Some might get the error message saying that `libimf.so` cannot be found.
|
||||||
|
Please follow this [stackoverflow page](https://stackoverflow.com/questions/70687930/intel-oneapi-2022-libimf-so-no-such-file-or-directory-during-openmpi-compila).
|
||||||
|
|
||||||
|
### Reference:
|
||||||
|
|
||||||
|
1. https://github.com/flame/blis#getting-started
|
||||||
|
2. https://github.com/flame/blis/blob/master/docs/Multithreading.md
|
|
@ -65,7 +65,8 @@ endif()
|
||||||
|
|
||||||
# 3rd party libs
|
# 3rd party libs
|
||||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||||
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
|
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
||||||
|
option(LLAMA_BLAS_VENDOR "llama: BLA_VENDOR from https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" Generic)
|
||||||
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
|
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
|
||||||
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
||||||
|
|
||||||
|
@ -145,36 +146,28 @@ if (APPLE AND LLAMA_ACCELERATE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_OPENBLAS)
|
if (LLAMA_BLAS)
|
||||||
if (LLAMA_STATIC)
|
if (LLAMA_STATIC)
|
||||||
set(BLA_STATIC ON)
|
set(BLA_STATIC ON)
|
||||||
endif()
|
endif()
|
||||||
|
if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22)
|
||||||
set(BLA_VENDOR OpenBLAS)
|
set(BLA_SIZEOF_INTEGER 8)
|
||||||
|
endif()
|
||||||
|
set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
|
||||||
find_package(BLAS)
|
find_package(BLAS)
|
||||||
if (BLAS_FOUND)
|
if (BLAS_FOUND)
|
||||||
message(STATUS "OpenBLAS found")
|
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
||||||
|
|
||||||
|
add_compile_options(${BLAS_LINKER_FLAGS})
|
||||||
add_compile_definitions(GGML_USE_OPENBLAS)
|
add_compile_definitions(GGML_USE_OPENBLAS)
|
||||||
add_link_options(${BLAS_LIBRARIES})
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} openblas)
|
|
||||||
|
|
||||||
# find header file
|
message("${BLAS_LIBRARIES} ${BLAS_INCLUDE_DIRS}")
|
||||||
set(OPENBLAS_INCLUDE_SEARCH_PATHS
|
include_directories(${BLAS_INCLUDE_DIRS})
|
||||||
/usr/include
|
|
||||||
/usr/include/openblas
|
|
||||||
/usr/include/openblas-base
|
|
||||||
/usr/local/include
|
|
||||||
/usr/local/include/openblas
|
|
||||||
/usr/local/include/openblas-base
|
|
||||||
/opt/OpenBLAS/include
|
|
||||||
$ENV{OpenBLAS_HOME}
|
|
||||||
$ENV{OpenBLAS_HOME}/include
|
|
||||||
)
|
|
||||||
find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
|
|
||||||
add_compile_options(-I${OPENBLAS_INC})
|
|
||||||
else()
|
else()
|
||||||
message(WARNING "OpenBLAS not found")
|
message(WARNING "BLAS not found, please refer to "
|
||||||
|
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
||||||
|
" to set correct LLAMA_BLAS_VENDOR")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
4
Makefile
4
Makefile
|
@ -122,6 +122,10 @@ ifdef LLAMA_OPENBLAS
|
||||||
LDFLAGS += -lopenblas
|
LDFLAGS += -lopenblas
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
ifdef LLAMA_BLIS
|
||||||
|
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
|
||||||
|
LDFLAGS += -lblis -L/usr/local/lib
|
||||||
|
endif
|
||||||
ifdef LLAMA_CUBLAS
|
ifdef LLAMA_CUBLAS
|
||||||
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
|
|
19
README.md
19
README.md
|
@ -56,7 +56,7 @@ The main goal of `llama.cpp` is to run the LLaMA model using 4-bit integer quant
|
||||||
- Mixed F16 / F32 precision
|
- Mixed F16 / F32 precision
|
||||||
- 4-bit, 5-bit and 8-bit integer quantization support
|
- 4-bit, 5-bit and 8-bit integer quantization support
|
||||||
- Runs on the CPU
|
- Runs on the CPU
|
||||||
- OpenBLAS support
|
- Supports OpenBLAS/Apple BLAS/ARM Performance Lib/ATLAS/BLIS/Intel MKL/NVHPC/ACML/SCSL/SGIMATH and [more](https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors) in BLAS
|
||||||
- cuBLAS and CLBlast support
|
- cuBLAS and CLBlast support
|
||||||
|
|
||||||
The original implementation of `llama.cpp` was [hacked in an evening](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022).
|
The original implementation of `llama.cpp` was [hacked in an evening](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022).
|
||||||
|
@ -274,10 +274,25 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
```bash
|
```bash
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_OPENBLAS=ON
|
cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- BLIS
|
||||||
|
|
||||||
|
Check [BLIS.md](BLIS.md) for more information.
|
||||||
|
|
||||||
|
- Intel MKL
|
||||||
|
|
||||||
|
By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. You may also specify it by:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
cmake --build . -config Release
|
||||||
|
```
|
||||||
|
|
||||||
- cuBLAS
|
- cuBLAS
|
||||||
|
|
||||||
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
|
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
|
||||||
|
|
Loading…
Reference in a new issue