Add LLAMA_DEFAULT_RMS_EPS so we can change the default (#2384)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow 2023-07-25 18:35:53 +03:00 committed by GitHub
parent 07aaa0f63f
commit eb542d3932
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 13 additions and 5 deletions

View file

@ -8,7 +8,11 @@
#pragma warning(disable: 4244 4267) // possible loss of data #pragma warning(disable: 4244 4267) // possible loss of data
#endif #endif
static const float rms_norm_eps = 1e-6f; #ifdef LLAMA_DEFAULT_RMS_EPS
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
#else
static const float rms_norm_eps = 5e-6f;
#endif
float frand() { float frand() {
return (float)rand()/(float)RAND_MAX; return (float)rand()/(float)RAND_MAX;

View file

@ -34,7 +34,7 @@ struct gpt_params {
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
float rms_norm_eps = 1e-6; // rms norm epsilon float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; // rms norm epsilon
float rope_freq_base = 10000.0f; // RoPE base frequency float rope_freq_base = 10000.0f; // RoPE base frequency
float rope_freq_scale = 1.0f; // RoPE frequency scaling factor float rope_freq_scale = 1.0f; // RoPE frequency scaling factor

View file

@ -16,7 +16,7 @@
#pragma warning(disable: 4244 4267) // possible loss of data #pragma warning(disable: 4244 4267) // possible loss of data
#endif #endif
static const float rms_norm_eps = 1e-6f; static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
struct random_normal_distribution { struct random_normal_distribution {
std::mt19937 gen; std::mt19937 gen;

View file

@ -186,7 +186,7 @@ struct llama_hparams {
// LLaMAv2 // LLaMAv2
// TODO: load from model data hparams // TODO: load from model data hparams
float f_ffn_mult = 1.0f; float f_ffn_mult = 1.0f;
float f_rms_norm_eps = 1e-6f; float f_rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
float rope_freq_base = 10000.0f; float rope_freq_base = 10000.0f;
float rope_freq_scale = 1.0f; float rope_freq_scale = 1.0f;
@ -870,7 +870,7 @@ struct llama_context_params llama_context_default_params() {
/*.n_ctx =*/ 512, /*.n_ctx =*/ 512,
/*.n_batch =*/ 512, /*.n_batch =*/ 512,
/*.n_gqa =*/ 1, /*.n_gqa =*/ 1,
/*.rms_norm_eps =*/ 1e-6f, /*.rms_norm_eps =*/ LLAMA_DEFAULT_RMS_EPS,
/*.gpu_layers =*/ 0, /*.gpu_layers =*/ 0,
/*.main_gpu =*/ 0, /*.main_gpu =*/ 0,
/*.tensor_split =*/ nullptr, /*.tensor_split =*/ nullptr,

View file

@ -53,6 +53,10 @@
#define LLAMA_SUPPORTS_GPU_OFFLOAD #define LLAMA_SUPPORTS_GPU_OFFLOAD
#endif #endif
#ifndef LLAMA_DEFAULT_RMS_EPS
#define LLAMA_DEFAULT_RMS_EPS 5e-6f
#endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif