Release 8-bit models memory

This commit is contained in:
oobabooga 2023-01-19 12:01:58 -03:00
parent f9faad4cfa
commit 759da435e3

View File

@ -11,6 +11,7 @@ import transformers
from html_generator import *
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings
import gc
transformers.logging.set_verbosity_error()
@ -151,6 +152,7 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok
model = None
tokenizer = None
if not args.cpu:
gc.collect()
torch.cuda.empty_cache()
model, tokenizer = load_model(model_name)
if inference_settings != loaded_preset: