OpenLLaMA 3B support (#1588)

This adds support to llama.cpp to load the model.

Currently missing are changes that are required from convert.py to convert the model correctly. It needs some changes to start reading the JSON configuration for HF models instead of deriving the values by guessing.

Co-authored-by: FNsi <125447286+FNsi@users.noreply.github.com>
This commit is contained in:
Henri Vasserman 2023-05-30 21:24:22 +03:00 committed by GitHub
parent 7552ac5863
commit ffb06a345e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -42,6 +42,7 @@
// available llama models // available llama models
enum e_model { enum e_model {
MODEL_UNKNOWN, MODEL_UNKNOWN,
MODEL_3B,
MODEL_7B, MODEL_7B,
MODEL_13B, MODEL_13B,
MODEL_30B, MODEL_30B,
@ -58,6 +59,7 @@ static const size_t MB = 1024*1024;
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0() static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
{ {
static std::map<e_model, size_t> k_sizes = { static std::map<e_model, size_t> k_sizes = {
{ MODEL_3B, 128ull * MB },
{ MODEL_7B, 512ull * MB }, { MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB }, { MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB }, { MODEL_30B, 512ull * MB },
@ -69,6 +71,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1() static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{ {
static std::map<e_model, size_t> k_sizes = { static std::map<e_model, size_t> k_sizes = {
{ MODEL_3B, 128ull * MB },
{ MODEL_7B, 512ull * MB }, { MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB }, { MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB }, { MODEL_30B, 512ull * MB },
@ -81,6 +84,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF() static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
{ {
static std::map<e_model, size_t> k_sizes = { static std::map<e_model, size_t> k_sizes = {
{ MODEL_3B, 682ull * MB },
{ MODEL_7B, 1026ull * MB }, { MODEL_7B, 1026ull * MB },
{ MODEL_13B, 1608ull * MB }, { MODEL_13B, 1608ull * MB },
{ MODEL_30B, 3124ull * MB }, { MODEL_30B, 3124ull * MB },
@ -94,6 +98,7 @@ static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
static const std::map<e_model, size_t> & MEM_REQ_EVAL() static const std::map<e_model, size_t> & MEM_REQ_EVAL()
{ {
static std::map<e_model, size_t> k_sizes = { static std::map<e_model, size_t> k_sizes = {
{ MODEL_3B, 512ull * MB },
{ MODEL_7B, 768ull * MB }, { MODEL_7B, 768ull * MB },
{ MODEL_13B, 1024ull * MB }, { MODEL_13B, 1024ull * MB },
{ MODEL_30B, 1280ull * MB }, { MODEL_30B, 1280ull * MB },
@ -899,6 +904,7 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
static const char *llama_model_type_name(e_model type) { static const char *llama_model_type_name(e_model type) {
switch (type) { switch (type) {
case MODEL_3B: return "3B";
case MODEL_7B: return "7B"; case MODEL_7B: return "7B";
case MODEL_13B: return "13B"; case MODEL_13B: return "13B";
case MODEL_30B: return "30B"; case MODEL_30B: return "30B";
@ -932,6 +938,7 @@ static void llama_model_load_internal(
{ {
switch (hparams.n_layer) { switch (hparams.n_layer) {
case 26: model.type = e_model::MODEL_3B; break;
case 32: model.type = e_model::MODEL_7B; break; case 32: model.type = e_model::MODEL_7B; break;
case 40: model.type = e_model::MODEL_13B; break; case 40: model.type = e_model::MODEL_13B; break;
case 60: model.type = e_model::MODEL_30B; break; case 60: model.type = e_model::MODEL_30B; break;