From 36168db750fd1fa1aaa3e17f5fa3c3db4f3d3731 Mon Sep 17 00:00:00 2001 From: kali <> Date: Mon, 28 Aug 2023 22:38:14 -0400 Subject: [PATCH] add calisuck.py and start howto --- README.md | 4 +- calisuck/HowTo.md | 21 ++ calisuck/calisuck.py | 819 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 842 insertions(+), 2 deletions(-) create mode 100644 calisuck/HowTo.md create mode 100644 calisuck/calisuck.py diff --git a/README.md b/README.md index b17850c..eca0f42 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ Mirror of Krazybug's calibre scripts -- -ToDo +## ToDo 1. Create guide on how to find calibre instances and index them with calisuck 2. Howto export as sqlite.db so calishot can use it to act as a search engine of said instances 3. ???? 4. Profit +5. How to pull calibre URLs from Shodan. \ No newline at end of file diff --git a/calisuck/HowTo.md b/calisuck/HowTo.md new file mode 100644 index 0000000..a6fa13a --- /dev/null +++ b/calisuck/HowTo.md @@ -0,0 +1,21 @@ +# Howto + + // You need python 3.5 at a minimum and initialize and activate a venv + python -m venv . + // Might need to activate the venv manually + . bin/activate + // Pre-reqs via pip + pip install requests fire humanize langid iso639 beautifultable + // help commands + python calisuck.py --help + python calisuck.py index-ebooks --help + python calisuck.py download-ebooks --help + python calisuck.py download-covers --help + +# Where the hell do I find instances? +### Shodan : +Apparently searching for "calibre" in shodan gives you thousands of results. Unfortunately you can't filtre without making an account. +There has to be a way to automatically pull the URLs from Shodan but it's out of my current expertise. + +### + diff --git a/calisuck/calisuck.py b/calisuck/calisuck.py new file mode 100644 index 0000000..c8095ec --- /dev/null +++ b/calisuck/calisuck.py @@ -0,0 +1,819 @@ +#!/usr/bin/env python3 + +''' +calisuck: index, filter-out smartly and download ebooks from Calibre open directories +Installation: + You need python 3.5 installed + Download the file as a zip and unzip-it and get into the dir + OR + > git clone https://gist.github.com/b7e814d7189db9ee1d6b9c1d1a1de95c.git + > mv b7e814d7189db9ee1d6b9c1d1a1de95c calisuck + > cd calisuck + > + THEN + > python3 -m venv . + > . bin/activate + > pip install requests fire humanize langid iso639 beautifultable + > python calisuck.py --help + > python calisuck.py index-ebooks --help + > python calisuck.py download-ebooks --help + > python calisuck.py download-covers --help +''' + +''' + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + Copyright (C) 2004 Sam Hocevar + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + 0. You just DO WHAT THE FUCK YOU WANT TO. +''' + +import sys +import os +import time +import re +import shutil +import requests +import json +import fire +from humanize import naturalsize as hsize +from langid.langid import LanguageIdentifier, model +import iso639 +import time +from requests.adapters import HTTPAdapter +import urllib.parse +import urllib3 +from beautifultable import BeautifulTable + + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip', 'fb2'] +identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True) + + +def load_metadata(path, uuid): + filepath=path+'/'+uuid+'/metadata.json' + # print (filepath) + if os.path.isfile(filepath): + try: + with open(filepath, 'r') as fd: + return json.load(fd) + except: + print ("Error loading metadata for:", uuid, "from path:", path) + return 0 + else: + # print ("Metadata not found for:", uuid, "from path:", path) + return 0 + + +def save_metadata(path, book): + filepath=path+'/'+book['uuid']+'/metadata.json' + # print("Saving book metadata for:", book['uuid'], "to:", filepath) + os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True) + with open(filepath+".tmp", 'w') as fd: + json.dump(book, fd, indent=4, separators=(',', ': ')) + try: + shutil.move(filepath+".tmp", filepath) + # print("Saved to:", filepath) + except: + print("Unable to rename .tmp file:", filepath+".tmp") + + +def get_cover_path(path, uuid): + filepath=path+'/'+uuid+'/cover.jpg' + if os.path.isfile(filepath): return filepath + else: return 0 + + +def get_file_path(path, uuid, fileformat): + files=os.listdir(path+'/'+uuid) + if files: + for f in files: + fname, ext=os.path.splitext(f) + if ext =='.'+fileformat: + return path+'/'+uuid+'/'+f + else: return 0 + else: return 0 + + +def get_cover(path, book, map): + url=book['source']['cover'] + if map: + pu=urllib.parse.urlparse(url) + pu=(pu[0], map, *pu[2:]) + print(pu) + url=urllib.parse.urlunparse(pu) + + print("Downloading cover from:", url) + + r=requests.get(url, timeout=(20, 3), verify=False) + r.raise_for_status() + + filepath=path+'/'+book['uuid']+'/cover.jpg' + os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True) + with open(filepath+".tmp", 'wb') as fd: + fd.write(r.content) + shutil.move(filepath+".tmp", filepath) + print("Saved to:", filepath) + + +def download_covers(dir='my_books', server='', map=""): + """ Download covers for each books""" + + for root, dirs, files in os.walk(dir, topdown=True): + for d in dirs: + # print() + # print("-->", d) + book = load_metadata(root, d) + if book: + # if book['source']['status'] != "ignored": + if True: + if not get_cover_path(root, book['uuid']): + print() + print("-->", d) + print(book['uuid']) + try: + get_cover(root, book, map) + except: + print ("Unable to get cover", book['uuid']) + else: + pass + # print ("Cover already present:", book['uuid']) + else: + print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status'])) + else: + print ("No ebook metadata found in:", root) + + +def get_file_size(url): + print("Downloading size:", url) + r = requests.head(url, verify=False) + r.raise_for_status() + size=r.headers['Content-Length'] + print("Size received="+ hsize(size)) + return int(size) + + +def get_file(path, book, format, session, map, map_lib): + uuid = book['uuid'] + url=book['source']['formats'][format]['url'] + if map: + pu=urllib.parse.urlparse(url) + pu=(pu[0], map, *pu[2:]) + print(pu) + url=urllib.parse.urlunparse(pu) + + if map_lib: + # pu=urllib.parse.urlparse(url) + # print(pu) + url_s=url.split("/") + # print(url_s) + url_s=url_s[:-1]+[map_lib] + # print('/'.join(url_s)) + url='/'.join(url_s) + + print() + print("Downloading ebook:", url) + print("Size expected (estimation):", hsize(book['source']['formats'][format]['size'])) + r = session.get(url, timeout=(25,15), verify=False) + # headers = {"Range": "bytes=0-1023"} + # r = requests.get(url, headers=headers) + r.raise_for_status() + # print(r.headers) + if('Content-Length' in r.headers ): + print("Size received="+hsize(r.headers['Content-Length'])) + else: + print("Fize received") + + + filename=re.findall(r'filename="(.*)"', r.headers['Content-Disposition']) + # print(filename) + if len(filename): + filepath=path+'/'+uuid+'/'+filename[0] + else: + filepath=path+'/'+uuid+'/'+uuid+"."+format + + os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True) + with open(filepath+".tmp", 'wb') as fd: + fd.write(r.content) + shutil.move(filepath+".tmp", filepath) + print("Saved to:", filepath) + + +def set_status(uuid, status, dir='.'): + book = load_metadata(dir, uuid) + if book: + if book['source']['status'] != status: + book['source']['status'] = status + save_metadata(dir, book) + print("Status changed to", status+":", book['uuid'], "(", book['title'], ")") + else: + print("Status unchanged changed ", status+":", book['uuid']) + else: + print ("No ebook metadata found for:", uuid) + + +def remove_book(uuid, path='.'): + print(os.getcwd()) + bookpath=path+'/'+uuid + if os.path.isdir(bookpath): + try: + shutil.rmtree(bookpath) + print(uuid, "removed") + except: + print("problem") + else: + print(uuid, "not found") + + +def update_done_status(book): + source=book['source'] + if source['status']!='ignored': + if set(source['formats'].keys()) == set(book['formats']) & set(source['formats'].keys()): + book['source']['status']="done" + else: + book['source']['status']="todo" + + +def index_ebooks(site, library="", start=0, stop=0, dir="my_books", inc=1000, force_refresh=False): + """ + Index a remote Calibre library + You will get in your all the metadata (title, authors, isbn, ...) for each book. + They're stored as simple JSON files (metadata.json) so that you can easily visualize them or process them with 'jq' program. + They are stored in subdirectories with a UUID as a name. These directories do match different books and allow you to group all + the different formats of the same book and eventually the cover file. + You can mix books from different sites without any (theoric) collisions + Params: + --site= : Url of the site to index (ex: http://123.123.123.123/) + --library= (default=my_books) : Id of library to index. The script index the default library by default. + The id is string following '&library_id=' in the url + --force-refresh (defaul=False) : Force a refresh of the metadata. By default all the metdata + already gathered are ignored + --start= (default=0) + --stop= (default=0) : Allow indexing between a range of ebooks + + --inc= (default=1000) : Fix the number of ebooks for each request one the server + """ + + os.makedirs(dir, exist_ok=True) + + offset= 0 if not start else start-1 + num=min(1000,inc) + server=site.rstrip('/') + api=server+'/ajax/' + library= '/'+library if library else library + + print("Server:", server) + url=api+'search'+library+'?num=0' + print() + print("Getting ebooks count:", server) + try: + r = requests.get(url,verify=False) + r.raise_for_status() + except: + print("Unable to open site:", url) + sys.exit(1) + print("Total count=",r.json()["total_num"]) + total_num=int(r.json()["total_num"]) + total_num= total_num if not stop else stop + + print() + print("Start indexing") + + range=offset+1 + while offset < total_num: + remaining_num = min(num, total_num - offset) + # print() + # print("Downloading ids: offset="+str(offset), "num="+str(remaining_num)) + url=api+'search'+library+'?num='+str(remaining_num)+'&offset='+str(offset)+'&sort=timestamp&sort_order=desc' + + # print("->", url) + r=requests.get(url, verify=False) + # print("Ids received from:"+str(offset), "to:"+str(offset+remaining_num-1)) + + # print() + # print("\rDownloading metadata from", str(offset+1), "to", str(offset+remaining_num),end='') + books_s=",".join(str(i) for i in r.json()['book_ids']) + url=api+'books'+library+'?ids='+books_s + # print("->", url) + r=requests.get(url, verify=False) + # print(len(r.json()), "received") + + for id, r_book in r.json().items(): + uuid=r_book['uuid'] + if not uuid: + print ("No uuid for ebook: ignored") + continue + + if r_book['authors']: + desc= f"uuid={uuid} ({r_book['title']} / {r_book['authors'][0]})" + else: + desc= f"uuid={uuid} ({r_book['title']})" + s=f"\r--> {range}/{total_num} - {desc}" + s='{:140.140}'.format(s) + print (s, end='') + + if not force_refresh: + try: + book = load_metadata(dir, uuid) + except: + print() + print("Unable to get metadata from:", uuid) + range+=1 + continue + if book: + # print("Metadata already present for:", uuid) + range+=1 + continue + + + if not r_book['formats']: + print() + print("No format found for {}".format(r_book['uuid'])) + range+=1 + continue + + + book={} + url=api+'book/'+id + book['title']=r_book['title'] + book['authors']=r_book['authors'] + book['series']=r_book['series'] + book['series_index']=r_book['series_index'] + book['edition']=0 + book['uuid']=r_book['uuid'] + book['identifiers']=r_book['identifiers'] + book['comments']=r_book['comments'] + book['pubdate']=r_book['pubdate'] + book['publisher']=r_book['publisher'] + languages=r_book['languages'] + if not languages: + # if True: + if book['comments']: + text=book['comments'] + else: + text=book['title'] + s_language, prob=identifier.classify(text) + if prob >= 0.85: + language = iso639.to_iso639_2(s_language) + book['languages']=[language] + else: + book['languages']=[] + else: + book['languages']=[] + for l in languages: + book['languages'].append(iso639.to_iso639_2(l)) + + book['tags']=r_book['tags'] + book['formats']=[] + book['metadata_version']=0.1 + source={} + source['url']=url+library + source['id']=id + try: + tmpbook = load_metadata(dir, uuid) + except: + print("Unable to get metadata from:", uuid) + range+=1 + continue + if tmpbook and tmpbook['source']['status']=="ignored": + source['status']="ignored" + else: + source['status']="todo" + source['cover']=server+r_book['cover'] + source['timestamp']=r_book['timestamp'] + + format_sources={} + formats=r_book['formats'] + for f in formats: + s={} + url='' + if f in r_book['main_format']: + url=r_book['main_format'][f] + else: + url=r_book['other_formats'][f] + s['url']=server+url + + if 'size' in r_book['format_metadata'][f]: + s['size']=int(r_book['format_metadata'][f]['size']) + else: + print() + print("Size not found for format '{}' : {}".format(f, uuid)) + print("Trying to get size online: {}".format(s['url'])) + try: + s['size']=get_file_size(s['url']) + except: + print("Unable to access format '{}' : {} skipped".format(f, uuid)) + continue + s['status']='todo' + format_sources[f]=s + + source['formats']=format_sources + book['source']=source + + + if not source['formats']: + print("No format found for {}".format(r_book['uuid'])) + range+=1 + continue + update_done_status(book) + # print("Saving metadata for:", uuid) + try: + save_metadata(dir, book) + except: + print() + print("Unable to save book metadata", book['uuid']) + range+=1 + offset=offset+num + print() + print("Done") + + +def has_languages(book, languages=[], ignore_empty_language=False): + + # print("Accepted languages", languages) + if not ignore_empty_language: + # print("Unknown language accepted") + pass + + # rustine + if not 'languages' in book: + book['languages']=[] + + # print("Book languages", book['languages']) + + if ignore_empty_language and not book['languages']: + # print ("'{}' ignored: language is empty".format(book['uuid'])) + return False + + if not ignore_empty_language and not book['languages']: + # print ("'{}' todo: language is empty".format(book['uuid'])) + return True + + expected_languages=list(set(book['languages']) & set(languages)) + if languages and not expected_languages: + # print ("'{}' ignored: language {} not in {}".format(book['uuid'], book['languages'],languages)) + return False + + # print ("'{}' todo: expected languages {}".format(book['uuid'], expected_languages)) + return True + +def has_identifiers(book, identifiers=[], ignore_empty_identifiers=False): + + # print("Accepted identifiers", identifiers) + if not ignore_empty_identifiers: + # print("Unknown identifiers accepted") + pass + # print("Book identifiers", book['identifiers'].keys()) + + if ignore_empty_identifiers and not book['identifiers']: + # print ("'{}' ignored: identifier is empty".format(book['uuid'])) + return False + + if not ignore_empty_identifiers and not book['identifiers']: + # print ("'{}' todo: identifiers is empty".format(book['uuid'])) + return True + + expected_identifiers=list(set(book['identifiers'].keys()) & set(identifiers)) + if identifiers and not expected_identifiers: + # print ("'{}' ignored: identifiers {} not in {}".format(book['uuid'], book['identifiers'].keys(), identifiers)) + return False + + # print ("'{}' todo: expected identifiers {}".format(book['uuid'], expected_identifiers)) + return True + +def download_ebooks(dir= 'my_books', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False, timer=0, map="", map_lib=""): + ''' + Download ebooks in matching subdirs: + + The different formats of the same book are groupe in the same directory + with an UUID name close to the metadata file (metadata.json). + The status of the formats for a book and its global status are initially set to 'todo'. + They move to 'done' after their download. This allows you to rerun the download and progressively collect books. + You can use different options to filter the formats for the download + by language, size, format and identifiers(isbn, ...). + A report of the download is displayed at the end of the process. + You can run this command in dry mode (--dry-run) with different settings + to only display the report and prepare your effective. + Params: + --min-size= (default=0) + --max-size= (default=infinity) : Delimit the size in MB for the accepted formats + --dry-run (defaul=False) : Run the command to simulate the download + --language= : Restrict the download to a list of specific languages + (Ex: --languages='["eng","ita"]' + --ignore-empty-language (defaul=False) : Ignore books with unidentfied language + --formats= : Restrict the download to a list of specific formats + (Ex: --formats='["epub", "mobi", "pdf"]' + --ignore-formats= : Ignore the formats of a list of specific. + Compliant with --formats. + (Ex: --ignored-formats='["mp3", "rar", "zip"]' + --single-format (defaul=False) : Limit the download to 1 format per book with this preference order + 'azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub', + 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar' + , 'rtf', 'txt', 'zip', 'fb2' + --identifiers= : Restrict the download to a list of specific identifiers + (Ex: --identifiers='["isbn","asin"]' + --ignore-empty-identifiers (defaul=False) : Ignore books without identifiers (often OCR) + ''' + + + + # all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip'] + + print() + + if single_format: my_formats = formats if formats else all_ordered_formats + else: my_formats=formats + # print("formats=", my_formats) + + min_size=int(min_size)*1024*1024 + max_size=int(max_size)*1024*1024 + print ("Format expected between {} and {}".format(hsize(min_size), hsize(max_size) if max_size else "infinity")) + + total_size=0 + total_size_by_format={} + total_ebook_count=0 + total_format_count=0 + total_count_by_format={} + size_max=0 + size_min=0 + language_count={} + identifiers_count={} + + s = requests.Session() + + + for root, dirs, files in os.walk(dir, topdown=True): + for counter, uuid in enumerate(dirs): + book = load_metadata(root, uuid) + if book: + status=book['source']['status'] + if status=="todo": + + if not has_languages(book, languages=languages, ignore_empty_language=ignore_empty_language): + continue + + if not has_identifiers(book, identifiers=identifiers, ignore_empty_identifiers=ignore_empty_identifiers): + continue + + source=book['source'] + download_formats = get_formats_to_download(book, accepted_formats=my_formats, single_format=single_format, ignored_formats=ignored_formats, max_size=max_size, min_size=min_size) + if not len(download_formats): + # print ("'{}' ignored: no more format available in formats expected {}".format(uuid, download_formats)) + # print() + pass + else: + ebook_kept=False + for f in download_formats: + url = source['formats'][f]['url'] + # if map: + # pu=urllib.parse.urlparse(url) + # pu=(pu[0], map, *pu[2:]) + # print(pu) + # print(urllib.parse.urlunparse(pu)) + if url: + # # It shouldn't occur: Need to download again + if get_file_path(dir, uuid, f): + # print ("Format '{}' already present for {}: Retrying".format(f, uuid)) + # print() + # continue + + # print("Format '{}': size expected={}".format(f, hsize(source['formats'][f]['size']))) + pass + + # print(f"--> format '{f}' for ({book['title']} / {book['authors'][0]} / {str(book['series'])})") + if not dry_run: + try: + get_file(dir, book, f, s, map, map_lib) + book['formats'].append(f) + book['source']['formats'][f]['status']="done" + if timer: + print(f"Waiting {timer} seconds") + time.sleep(timer) + except Exception as msg: + print("Unable to get book:", url) + print(msg) + time.sleep(5) + continue + save_metadata(dir, book) + + ebook_kept=True + size=source['formats'][f]['size'] + total_size += size + size_max = size if size>size_max else size_max + if not size_min: + size_min = size + else: + size_min = size if size' in language_count: + language_count[''] = 1 + else: + language_count['']+=1 + else: + for l in book['languages']: + if not l in language_count: + language_count[l] = 1 + else: + language_count[l]+=1 + if not book['identifiers']: + if not '' in identifiers_count: + identifiers_count[''] = 1 + else: + identifiers_count['']+=1 + else: + for l in book['identifiers'].keys(): + if not l in identifiers_count: + identifiers_count[l] = 1 + else: + identifiers_count[l]+=1 + + if not dry_run: + update_done_status(book) + if book['source']['status']=="done": + save_metadata(dir, book) + print("Book done:", book['uuid']) + print() + # total_ebook_count+=1 + else: + # print() + # print("-->", uuid, "("+book['title']+")") + # print ('{} in status "{}": skipped'.format(book['uuid'], status)) + # print(f"--> {uuid} ({book['title']}) in status {status}: skipped", end="\r") + # print(f"--> {uuid} ({book['title']})", end="\r") + print(f'--> {counter} books handled', end="\r") + + print() + print("Reporting ...") + + table_l = BeautifulTable() + table_l.column_headers = ["Language", "Ebooks count"] + for l, c in language_count.items(): + table_l.append_row([l, c]) + table_l.sort("Ebooks count", reverse=True) + table_l=table_l[0:10] + + table_i = BeautifulTable() + table_i.column_headers = ["Identifier", "Ebooks count"] + for i, c in identifiers_count.items(): + table_i.append_row([i, c]) + table_i.sort("Ebooks count", reverse=True) + table_i=table_i[0:10] + + print() + print("Top 10 ebooks by language/identifier:") + table = BeautifulTable() + table.column_headers = ["Languages", "Identifiers"] + table.append_row([table_l, table_i]) + # table.set_style(BeautifulTable.STYLE_MARKDOWN) + print(table) + + + print() + print("Total count of ebooks by format:") + table = BeautifulTable() + table.column_headers = ["Format", "Size", "Ebooks count"] + for f in total_count_by_format.keys(): + table.append_row([f, hsize(total_size_by_format[f]),total_count_by_format[f]]) + table.sort("Ebooks count", reverse=True) + # table.set_style(BeautifulTable.STYLE_MARKDOWN) + print(table) + + + table_c = BeautifulTable() + table_c.column_headers = ["", "Total count"] + table_c.append_row(["Formats", total_format_count]) + table_c.append_row(["Ebooks", total_ebook_count]) + + table_s = BeautifulTable() + table_s.column_headers = ["", "Size"] + # table.append_row(["Min", hsize(size_min)]) + table_s.append_row(["Biggest File", hsize(size_max)]) + table_s.append_row(["Total", hsize(total_size)]) + + print() + print("Summary:") + table = BeautifulTable() + table.column_headers = ["Total Count", "Total Size"] + table.append_row([table_c, table_s]) + # table.set_style(BeautifulTable.STYLE_MARKDOWN) + print(table) + + print() + + +def get_formats_to_download(book, accepted_formats=[], ignored_formats=[], single_format=False, min_size=0, max_size=0): + # print("Accepted formats", accepted_formats) + source=book['source'] + # print("Formats available in source: {}".format(list(source['formats'].keys()))) + my_formats=[] + for f,v in source['formats'].items(): + if v['status']=='todo': + my_formats.append(f) + # print("Formats in 'todo': {}".format(my_formats)) + + formats=[] + if single_format: + if accepted_formats: + for f in accepted_formats: + if f in my_formats: + formats=[f] + break + else: + print("need at least 1 format for ordering") + else: + if accepted_formats: + formats=list(set(accepted_formats) & set(my_formats)) + elif ignored_formats: + formats = list(set(my_formats) - set(ignored_formats)) + else: + formats=my_formats + + # print("Formats expected: {}".format(formats)) + + download_formats=formats[:] + for f in formats: + if not 'size' in source['formats'][f] and max_size: + # print ("Format '{}' ignored for {}: Size unknown".format(f, book['uuid'])) + download_formats.remove(f) + else: + size = source['formats'][f]['size'] + if size < min_size or (max_size and size > max_size): + download_formats.remove(f) + # print ("Format '{}' ignored for {}: size={} but expected between {} and {}".format(f, book['uuid'], hsize(size), hsize(min_size), hsize(max_size) if max_size else "infinity")) + return download_formats + + +def update_format_statuses(book,refresh_ignored): + formats=book['source']['formats'] + for f, v in formats.items(): + if v['status']=='ignored' and not refresh_ignored: + # print ("Format '{}' ignored: {} ({}))".format(f, book['uuid'], book['title'])) + pass + else: + # print ("Format '{}' todo: {} ({}))".format(f, book['uuid'], book['title'])) + book['source']['formats'][f]['status']='todo' + +import glob +def check_ebooks(dir= 'my_books', dry_run=True): + ''' + Check ebooks: + ''' + + print("Checking ...") + + for root, dirs, files in os.walk(dir, topdown=True): + for counter, uuid in enumerate(dirs): + book = load_metadata(root, uuid) + if book: + status=book['source']['status'] + if status=="todo": + print(status) + source=book['source'] + update=False + for f, v in source["formats"].items(): + print(uuid, f, v['status']) + if v['status']=="todo": + formats= glob.glob(root+"/"+uuid+"/*."+f) + print(formats) + if formats: + print(book['uuid'], formats[0]) + book['source']['formats'][f]['status']="done" + update=True + + if not dry_run and update: + update_done_status(book) + save_metadata(dir, book) + print("Book done", book['uuid']) + print() + print() + + +if __name__ == "__main__": + fire.Fire({ + "index_ebooks": index_ebooks, + "download_ebooks": download_ebooks, + "download_covers": download_covers, + "set_status": set_status, + "check_ebooks": check_ebooks + }) \ No newline at end of file