mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
Add a warning about ExLlamaV2 without flash-attn
This commit is contained in:
parent
f0ef971edb
commit
605ec3c9f2
@ -13,6 +13,17 @@ from exllamav2.generator import ExLlamaV2BaseGenerator, ExLlamaV2Sampler
|
||||
from modules import shared
|
||||
from modules.text_generation import get_max_prompt_length
|
||||
|
||||
try:
|
||||
import flash_attn
|
||||
except ModuleNotFoundError:
|
||||
logger.warning(
|
||||
'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage '
|
||||
'to be a lot higher than it could be.\n'
|
||||
'Try installing flash-attention following the instructions here: '
|
||||
'https://github.com/Dao-AILab/flash-attention#installation-and-features'
|
||||
)
|
||||
pass
|
||||
|
||||
|
||||
class Exllamav2Model:
|
||||
def __init__(self):
|
||||
|
@ -11,6 +11,17 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
|
||||
try:
|
||||
import flash_attn
|
||||
except ModuleNotFoundError:
|
||||
logger.warning(
|
||||
'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage '
|
||||
'to be a lot higher than it could be.\n'
|
||||
'Try installing flash-attention following the instructions here: '
|
||||
'https://github.com/Dao-AILab/flash-attention#installation-and-features'
|
||||
)
|
||||
pass
|
||||
|
||||
|
||||
class Exllamav2HF(PreTrainedModel):
|
||||
def __init__(self, config: ExLlamaV2Config):
|
||||
|
Loading…
Reference in New Issue
Block a user