text-generation-webui/modules/loaders.py

import functools
from collections import OrderedDict

import gradio as gr

from modules import shared

loaders_and_params = OrderedDict({
    'Transformers': [
        'cpu_memory',
        'gpu_memory',
        'trust_remote_code',
        'load_in_8bit',
        'bf16',
        'cpu',
        'disk',
        'auto_devices',
        'load_in_4bit',
        'use_double_quant',
        'quant_type',
        'compute_dtype',
        'trust_remote_code',
        'alpha_value',
        'compress_pos_emb',
        'transformers_info'
    ],
    'ExLlama_HF': [
        'gpu_split',
        'max_seq_len',
        'alpha_value',
        'compress_pos_emb',
        'exllama_HF_info',
    ],
    'ExLlama': [
        'gpu_split',
        'max_seq_len',
        'alpha_value',
        'compress_pos_emb',
        'exllama_info',
    ],
    'AutoGPTQ': [
        'triton',
        'no_inject_fused_attention',
        'no_inject_fused_mlp',
        'no_use_cuda_fp16',
        'wbits',
        'groupsize',
        'desc_act',
        'gpu_memory',
        'cpu_memory',
        'cpu',
        'disk',
        'auto_devices',
        'trust_remote_code',
        'autogptq_info',
    ],
    'GPTQ-for-LLaMa': [
        'wbits',
        'groupsize',
        'model_type',
        'pre_layer',
        'gptq_for_llama_info',
    ],
    'llama.cpp': [
        'n_ctx',
        'n_gqa',
        'rms_norm_eps',
        'n_gpu_layers',
        'n_batch',
        'threads',
        'no_mmap',
        'low_vram',
        'mlock',
        'llama_cpp_seed',
        'alpha_value',
        'compress_pos_emb',
        'cpu',
    ],
    'llamacpp_HF': [
        'n_ctx',
        'n_gqa',
        'rms_norm_eps',
        'n_gpu_layers',
        'n_batch',
        'threads',
        'no_mmap',
        'low_vram',
        'mlock',
        'alpha_value',
        'compress_pos_emb',
        'cpu',
        'llamacpp_HF_info',
    ],
    'ctransformers': [
        'n_gpu_layers',
        'n_batch',
        'threads',
        'model_type'
    ]
})

loaders_samplers = {
    'Transformers': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'penalty_alpha',
        'num_beams',
        'length_penalty',
        'early_stopping',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'guidance_scale',
        'negative_prompt',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'ExLlama_HF': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'ExLlama': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'repetition_penalty',
        'repetition_penalty_range',
        'seed',
        'guidance_scale',
        'negative_prompt',
        'ban_eos_token',
        'auto_max_new_tokens',
    },
    'AutoGPTQ': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'penalty_alpha',
        'num_beams',
        'length_penalty',
        'early_stopping',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'guidance_scale',
        'negative_prompt',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'GPTQ-for-LLaMa': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'penalty_alpha',
        'num_beams',
        'length_penalty',
        'early_stopping',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'guidance_scale',
        'negative_prompt',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'llama.cpp': {
        'temperature',
        'top_p',
        'top_k',
        'tfs',
        'repetition_penalty',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
    },
    'llamacpp_HF': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'ctransformers': {
        'temperature',
        'top_p',
        'top_k',
        'repetition_penalty',
        'repetition_penalty_range',
    }
}

loaders_model_types = {
    'GPTQ-for-LLaMa': [
        "None",
        "llama",
        "opt",
        "gptj"
    ],
    'ctransformers': [
        "None",
        "gpt2",
        "gptj",
        "gptneox",
        "llama",
        "mpt",
        "dollyv2"
        "replit",
        "starcoder",
        "falcon"
    ],
}


@functools.cache
def list_all_samplers():
    all_samplers = set()
    for k in loaders_samplers:
        for sampler in loaders_samplers[k]:
            all_samplers.add(sampler)

    return sorted(all_samplers)


def blacklist_samplers(loader):
    all_samplers = list_all_samplers()
    if loader == 'All':
        return [gr.update(visible=True) for sampler in all_samplers]
    else:
        return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers]


def get_model_types(loader):
    if loader in loaders_model_types:
        return loaders_model_types[loader]

    return ["None"]


def get_gpu_memory_keys():
    return [k for k in shared.gradio if k.startswith('gpu_memory')]


@functools.cache
def get_all_params():
    all_params = set()
    for k in loaders_and_params:
        for el in loaders_and_params[k]:
            all_params.add(el)

    if 'gpu_memory' in all_params:
        all_params.remove('gpu_memory')
        for k in get_gpu_memory_keys():
            all_params.add(k)

    return sorted(all_params)


def make_loader_params_visible(loader):
    params = []
    all_params = get_all_params()
    if loader in loaders_and_params:
        params = loaders_and_params[loader]

        if 'gpu_memory' in params:
            params.remove('gpu_memory')
            params += get_gpu_memory_keys()

    return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00			`import functools`
Add ctransformers support (#3313) --------- Co-authored-by: cal066 <cal066@users.noreply.github.com> Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> 2023-08-11 13:41:33 -04:00			`from collections import OrderedDict`
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00
			`import gradio as gr`

			`from modules import shared`

Add ctransformers support (#3313) --------- Co-authored-by: cal066 <cal066@users.noreply.github.com> Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> 2023-08-11 13:41:33 -04:00			`loaders_and_params = OrderedDict({`
			`'Transformers': [`
			`'cpu_memory',`
			`'gpu_memory',`
			`'trust_remote_code',`
			`'load_in_8bit',`
			`'bf16',`
			`'cpu',`
			`'disk',`
			`'auto_devices',`
			`'load_in_4bit',`
			`'use_double_quant',`
			`'quant_type',`
			`'compute_dtype',`
			`'trust_remote_code',`
			`'alpha_value',`
			`'compress_pos_emb',`
			`'transformers_info'`
			`],`
			`'ExLlama_HF': [`
			`'gpu_split',`
			`'max_seq_len',`
			`'alpha_value',`
			`'compress_pos_emb',`
			`'exllama_HF_info',`
			`],`
			`'ExLlama': [`
			`'gpu_split',`
			`'max_seq_len',`
			`'alpha_value',`
			`'compress_pos_emb',`
			`'exllama_info',`
			`],`
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00			`'AutoGPTQ': [`
			`'triton',`
			`'no_inject_fused_attention',`
			`'no_inject_fused_mlp',`
Add --no_use_cuda_fp16 param for AutoGPTQ 2023-06-23 11:22:56 -04:00			`'no_use_cuda_fp16',`
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00			`'wbits',`
			`'groupsize',`
			`'desc_act',`
			`'gpu_memory',`
			`'cpu_memory',`
			`'cpu',`
			`'disk',`
			`'auto_devices',`
			`'trust_remote_code',`
			`'autogptq_info',`
			`],`
			`'GPTQ-for-LLaMa': [`
			`'wbits',`
			`'groupsize',`
			`'model_type',`
			`'pre_layer',`
			`'gptq_for_llama_info',`
			`],`
			`'llama.cpp': [`
			`'n_ctx',`
Add llama-2-70b GGML support (#3285) 2023-07-24 15:37:03 -04:00			`'n_gqa',`
			`'rms_norm_eps',`
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00			`'n_gpu_layers',`
			`'n_batch',`
			`'threads',`
			`'no_mmap',`
Add low vram mode on llama cpp (#3076) 2023-07-12 10:05:13 -04:00			`'low_vram',`
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00			`'mlock',`
			`'llama_cpp_seed',`
[GGML] Support for customizable RoPE (#3083) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-07-17 21:32:37 -04:00			`'alpha_value',`
Add RoPE scaling support for transformers (including dynamic NTK) https://github.com/huggingface/transformers/pull/24653 2023-08-09 00:24:28 -04:00			`'compress_pos_emb',`
Add the --cpu option for llama.cpp to prevent CUDA from being used (#3432) 2023-08-03 10:00:36 -04:00			`'cpu',`
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00			`],`
Create llamacpp_HF loader (#3062) 2023-07-16 01:21:13 -04:00			`'llamacpp_HF': [`
			`'n_ctx',`
Add llama-2-70b GGML support (#3285) 2023-07-24 15:37:03 -04:00			`'n_gqa',`
			`'rms_norm_eps',`
Create llamacpp_HF loader (#3062) 2023-07-16 01:21:13 -04:00			`'n_gpu_layers',`
			`'n_batch',`
			`'threads',`
			`'no_mmap',`
			`'low_vram',`
			`'mlock',`
[GGML] Support for customizable RoPE (#3083) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-07-17 21:32:37 -04:00			`'alpha_value',`
Add RoPE scaling support for transformers (including dynamic NTK) https://github.com/huggingface/transformers/pull/24653 2023-08-09 00:24:28 -04:00			`'compress_pos_emb',`
Add the --cpu option for llama.cpp to prevent CUDA from being used (#3432) 2023-08-03 10:00:36 -04:00			`'cpu',`
Create llamacpp_HF loader (#3062) 2023-07-16 01:21:13 -04:00			`'llamacpp_HF_info',`
			`],`
Add ctransformers support (#3313) --------- Co-authored-by: cal066 <cal066@users.noreply.github.com> Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> 2023-08-11 13:41:33 -04:00			`'ctransformers': [`
			`'n_gpu_layers',`
			`'n_batch',`
			`'threads',`
ctransformers: move thread and seed parameters (#3543) 2023-08-12 23:04:03 -04:00			`'model_type'`
Add ExLlama support (#2444) 2023-06-16 19:35:38 -04:00			`]`
Add ctransformers support (#3313) --------- Co-authored-by: cal066 <cal066@users.noreply.github.com> Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> 2023-08-11 13:41:33 -04:00			`})`
Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`loaders_samplers = {`
			`'Transformers': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'penalty_alpha',`
			`'num_beams',`
			`'length_penalty',`
			`'early_stopping',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
Add Classifier Free Guidance (CFG) for Transformers/ExLlama (#3325) 2023-08-06 16:22:48 -04:00			`'guidance_scale',`
			`'negative_prompt',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 13:52:20 -04:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`},`
			`'ExLlama_HF': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 13:52:20 -04:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`},`
			`'ExLlama': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'seed',`
Add Classifier Free Guidance (CFG) for Transformers/ExLlama (#3325) 2023-08-06 16:22:48 -04:00			`'guidance_scale',`
			`'negative_prompt',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`'ban_eos_token',`
Implement auto_max_new_tokens for ExLlama 2023-08-02 14:01:29 -04:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`},`
			`'AutoGPTQ': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'penalty_alpha',`
			`'num_beams',`
			`'length_penalty',`
			`'early_stopping',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
Add Classifier Free Guidance (CFG) for Transformers/ExLlama (#3325) 2023-08-06 16:22:48 -04:00			`'guidance_scale',`
			`'negative_prompt',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 13:52:20 -04:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`},`
			`'GPTQ-for-LLaMa': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'penalty_alpha',`
			`'num_beams',`
			`'length_penalty',`
			`'early_stopping',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
Add Classifier Free Guidance (CFG) for Transformers/ExLlama (#3325) 2023-08-06 16:22:48 -04:00			`'guidance_scale',`
			`'negative_prompt',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 13:52:20 -04:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`},`
			`'llama.cpp': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'tfs',`
			`'repetition_penalty',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`},`
			`'llamacpp_HF': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 13:52:20 -04:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`},`
Add ctransformers support (#3313) --------- Co-authored-by: cal066 <cal066@users.noreply.github.com> Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> 2023-08-11 13:41:33 -04:00			`'ctransformers': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'repetition_penalty',`
Add repetition_penalty_range to ctransformers 2023-08-11 14:02:56 -04:00			`'repetition_penalty_range',`
Add ctransformers support (#3313) --------- Co-authored-by: cal066 <cal066@users.noreply.github.com> Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> 2023-08-11 13:41:33 -04:00			`}`
			`}`

			`loaders_model_types = {`
			`'GPTQ-for-LLaMa': [`
			`"None",`
			`"llama",`
			`"opt",`
			`"gptj"`
			`],`
			`'ctransformers': [`
			`"None",`
			`"gpt2",`
			`"gptj",`
			`"gptneox",`
			`"llama",`
			`"mpt",`
			`"dollyv2"`
			`"replit",`
			`"starcoder",`
			`"falcon"`
			`],`
Add a "Filter by loader" menu to the Parameters tab 2023-07-31 21:44:00 -04:00			`}`


			`@functools.cache`
			`def list_all_samplers():`
			`all_samplers = set()`
			`for k in loaders_samplers:`
			`for sampler in loaders_samplers[k]:`
			`all_samplers.add(sampler)`

			`return sorted(all_samplers)`


			`def blacklist_samplers(loader):`
			`all_samplers = list_all_samplers()`
			`if loader == 'All':`
			`return [gr.update(visible=True) for sampler in all_samplers]`
			`else:`
			`return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers]`

Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00
Add ctransformers support (#3313) --------- Co-authored-by: cal066 <cal066@users.noreply.github.com> Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> 2023-08-11 13:41:33 -04:00			`def get_model_types(loader):`
			`if loader in loaders_model_types:`
			`return loaders_model_types[loader]`

			`return ["None"]`


Reorganize model loading UI completely (#2720) 2023-06-16 18:00:37 -04:00			`def get_gpu_memory_keys():`
			`return [k for k in shared.gradio if k.startswith('gpu_memory')]`


			`@functools.cache`
			`def get_all_params():`
			`all_params = set()`
			`for k in loaders_and_params:`
			`for el in loaders_and_params[k]:`
			`all_params.add(el)`

			`if 'gpu_memory' in all_params:`
			`all_params.remove('gpu_memory')`
			`for k in get_gpu_memory_keys():`
			`all_params.add(k)`

			`return sorted(all_params)`


			`def make_loader_params_visible(loader):`
			`params = []`
			`all_params = get_all_params()`
			`if loader in loaders_and_params:`
			`params = loaders_and_params[loader]`

			`if 'gpu_memory' in params:`
			`params.remove('gpu_memory')`
			`params += get_gpu_memory_keys()`

			`return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]`