mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-01 01:26:03 -04:00
Download optimizations (#2786)
* download_model_files metadata writing improvement * line swap * reduce line length * safer download and greater block size * Minor changes by pycodestyle --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
447569e31a
commit
b22c7199c9
@ -77,7 +77,6 @@ class ModelDownloader:
|
|||||||
if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
|
if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
|
||||||
self.s.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
|
self.s.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
|
||||||
|
|
||||||
|
|
||||||
def sanitize_model_and_branch_names(self, model, branch):
|
def sanitize_model_and_branch_names(self, model, branch):
|
||||||
if model[-1] == '/':
|
if model[-1] == '/':
|
||||||
model = model[:-1]
|
model = model[:-1]
|
||||||
@ -92,7 +91,6 @@ class ModelDownloader:
|
|||||||
|
|
||||||
return model, branch
|
return model, branch
|
||||||
|
|
||||||
|
|
||||||
def get_download_links_from_huggingface(self, model, branch, text_only=False):
|
def get_download_links_from_huggingface(self, model, branch, text_only=False):
|
||||||
base = "https://huggingface.co"
|
base = "https://huggingface.co"
|
||||||
page = f"/api/models/{model}/tree/{branch}"
|
page = f"/api/models/{model}/tree/{branch}"
|
||||||
@ -163,7 +161,6 @@ class ModelDownloader:
|
|||||||
|
|
||||||
return links, sha256, is_lora
|
return links, sha256, is_lora
|
||||||
|
|
||||||
|
|
||||||
def get_output_folder(self, model, branch, is_lora, base_folder=None):
|
def get_output_folder(self, model, branch, is_lora, base_folder=None):
|
||||||
if base_folder is None:
|
if base_folder is None:
|
||||||
base_folder = 'models' if not is_lora else 'loras'
|
base_folder = 'models' if not is_lora else 'loras'
|
||||||
@ -174,10 +171,11 @@ class ModelDownloader:
|
|||||||
output_folder = Path(base_folder) / output_folder
|
output_folder = Path(base_folder) / output_folder
|
||||||
return output_folder
|
return output_folder
|
||||||
|
|
||||||
|
|
||||||
def get_single_file(self, url, output_folder, start_from_scratch=False):
|
def get_single_file(self, url, output_folder, start_from_scratch=False):
|
||||||
filename = Path(url.rsplit('/', 1)[1])
|
filename = Path(url.rsplit('/', 1)[1])
|
||||||
output_path = output_folder / filename
|
output_path = output_folder / filename
|
||||||
|
headers = {}
|
||||||
|
mode = 'wb'
|
||||||
if output_path.exists() and not start_from_scratch:
|
if output_path.exists() and not start_from_scratch:
|
||||||
# Check if the file has already been downloaded completely
|
# Check if the file has already been downloaded completely
|
||||||
r = self.s.get(url, stream=True, timeout=20)
|
r = self.s.get(url, stream=True, timeout=20)
|
||||||
@ -187,50 +185,45 @@ class ModelDownloader:
|
|||||||
# Otherwise, resume the download from where it left off
|
# Otherwise, resume the download from where it left off
|
||||||
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
|
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
|
||||||
mode = 'ab'
|
mode = 'ab'
|
||||||
else:
|
|
||||||
headers = {}
|
|
||||||
mode = 'wb'
|
|
||||||
|
|
||||||
r = self.s.get(url, stream=True, headers=headers, timeout=20)
|
with self.s.get(url, stream=True, headers=headers, timeout=20) as r:
|
||||||
with open(output_path, mode) as f:
|
r.raise_for_status() # Do not continue the download if the request was unsuccessful
|
||||||
total_size = int(r.headers.get('content-length', 0))
|
total_size = int(r.headers.get('content-length', 0))
|
||||||
# Every 4MB we report an update
|
block_size = 1024 * 1024 # 1MB
|
||||||
block_size = 4*1024*1024
|
with open(output_path, mode) as f:
|
||||||
|
with tqdm.tqdm(total=total_size,
|
||||||
with tqdm.tqdm(total=total_size, unit='iB', unit_scale=True, bar_format='{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}') as t:
|
unit='iB',
|
||||||
count = 0
|
unit_scale=True,
|
||||||
for data in r.iter_content(block_size):
|
bar_format='{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}'
|
||||||
t.update(len(data))
|
) as t:
|
||||||
f.write(data)
|
count = 0
|
||||||
if self.progress_bar is not None:
|
for data in r.iter_content(block_size):
|
||||||
count += len(data)
|
t.update(len(data))
|
||||||
self.progress_bar(float(count)/float(total_size), f"Downloading {filename}")
|
f.write(data)
|
||||||
|
if self.progress_bar is not None:
|
||||||
|
count += len(data)
|
||||||
|
self.progress_bar(float(count) / float(total_size), f"Downloading {filename}")
|
||||||
|
|
||||||
def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=1):
|
def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=1):
|
||||||
thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
|
thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
|
||||||
|
|
||||||
|
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=1):
|
||||||
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar = None, start_from_scratch=False, threads=1):
|
|
||||||
self.progress_bar = progress_bar
|
self.progress_bar = progress_bar
|
||||||
# Creating the folder and writing the metadata
|
# Creating the folder and writing the metadata
|
||||||
if not output_folder.exists():
|
output_folder.mkdir(parents=True, exist_ok=True)
|
||||||
output_folder.mkdir(parents=True, exist_ok=True)
|
metadata = f'url: https://huggingface.co/{model}\n' \
|
||||||
with open(output_folder / 'huggingface-metadata.txt', 'w') as f:
|
f'branch: {branch}\n' \
|
||||||
f.write(f'url: https://huggingface.co/{model}\n')
|
f'download date: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n'
|
||||||
f.write(f'branch: {branch}\n')
|
sha256_str = '\n'.join([f' {item[1]} {item[0]}' for item in sha256])
|
||||||
f.write(f'download date: {str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))}\n')
|
if sha256_str:
|
||||||
sha256_str = ''
|
metadata += f'sha256sum:\n{sha256_str}'
|
||||||
for i in range(len(sha256)):
|
metadata += '\n'
|
||||||
sha256_str += f' {sha256[i][1]} {sha256[i][0]}\n'
|
(output_folder / 'huggingface-metadata.txt').write_text(metadata)
|
||||||
if sha256_str != '':
|
|
||||||
f.write(f'sha256sum:\n{sha256_str}')
|
|
||||||
|
|
||||||
# Downloading the files
|
# Downloading the files
|
||||||
print(f"Downloading the model to {output_folder}")
|
print(f"Downloading the model to {output_folder}")
|
||||||
self.start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads)
|
self.start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads)
|
||||||
|
|
||||||
|
|
||||||
def check_model_files(self, model, branch, links, sha256, output_folder):
|
def check_model_files(self, model, branch, links, sha256, output_folder):
|
||||||
# Validate the checksums
|
# Validate the checksums
|
||||||
validated = True
|
validated = True
|
||||||
|
Loading…
Reference in New Issue
Block a user