add calisuck.py and start howto

2023-08-28 22:38:14 -04:00 · 2023-08-28 22:38:14 -04:00 · 36168db750
commit 36168db750
parent 68867b6316
3 changed files with 842 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -2,10 +2,10 @@
 Mirror of Krazybug's calibre scripts
-
+## ToDo 
 ToDo 
 1. Create guide on how to find calibre instances and index them with calisuck
 2. Howto export as sqlite.db so calishot can use it to act as a search engine of said instances
 3. ???? 
 4. Profit
 5. How to pull calibre URLs from Shodan. 
--- a/calisuck/HowTo.md
+++ b/calisuck/HowTo.md
@ -0,0 +1,21 @@
 # Howto
    // You need python 3.5 at a minimum and initialize and activate a venv
    python -m venv .
    // Might need to activate the venv manually
    . bin/activate
    // Pre-reqs via pip 
    pip install requests fire humanize langid iso639 beautifultable
    // help commands
    python calisuck.py --help
    python calisuck.py index-ebooks --help
    python calisuck.py download-ebooks --help
    python calisuck.py download-covers --help
 # Where the hell do I find instances? 
 ### Shodan : 
 Apparently searching for "calibre" in shodan gives you thousands of results. Unfortunately you can't filtre without making an account. 
 There has to be a way to automatically pull the URLs from Shodan but it's out of my current expertise. 
 ### 
--- a/calisuck/calisuck.py
+++ b/calisuck/calisuck.py
@ -0,0 +1,819 @@
 #!/usr/bin/env python3
 '''
 calisuck: index, filter-out smartly and download ebooks from Calibre open directories
 Installation:
    You need python 3.5 installed
    Download the file as a zip and unzip-it and get into the dir
    OR
    > git clone https://gist.github.com/b7e814d7189db9ee1d6b9c1d1a1de95c.git
    > mv b7e814d7189db9ee1d6b9c1d1a1de95c calisuck
    > cd calisuck
    > 
    THEN
    > python3 -m venv .
    > . bin/activate
    > pip install requests fire humanize langid iso639 beautifultable
    > python calisuck.py --help
    > python calisuck.py index-ebooks --help
    > python calisuck.py download-ebooks --help
    > python calisuck.py download-covers --help
 '''
 '''
   DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
                    Version 2, December 2004 
 Copyright (C) 2004 Sam Hocevar <sam@hocevar.net> 
 Everyone is permitted to copy and distribute verbatim or modified 
 copies of this license document, and changing it is allowed as long 
 as the name is changed. 
            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 
  0. You just DO WHAT THE FUCK YOU WANT TO.
 '''
 import sys
 import os
 import time
 import re
 import shutil
 import requests
 import json
 import fire
 from humanize import naturalsize as hsize
 from langid.langid import LanguageIdentifier, model
 import iso639
 import time
 from requests.adapters import HTTPAdapter
 import urllib.parse
 import urllib3
 from beautifultable import BeautifulTable
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip', 'fb2']        
 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
 def load_metadata(path, uuid):
    filepath=path+'/'+uuid+'/metadata.json'
    # print (filepath)
    if os.path.isfile(filepath):
        try:
            with open(filepath, 'r') as fd:
                return json.load(fd)
        except:
            print ("Error loading metadata for:", uuid, "from path:", path)
            return 0
    else:
        # print ("Metadata not found for:", uuid, "from path:", path)
        return 0        
 def save_metadata(path, book):
    filepath=path+'/'+book['uuid']+'/metadata.json'
    # print("Saving book metadata for:", book['uuid'], "to:", filepath)
    os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True)
    with open(filepath+".tmp", 'w') as fd:
        json.dump(book, fd, indent=4, separators=(',', ': '))
    try:
        shutil.move(filepath+".tmp", filepath)
        # print("Saved to:", filepath)
    except:
        print("Unable to rename .tmp file:", filepath+".tmp")
 def get_cover_path(path, uuid):
    filepath=path+'/'+uuid+'/cover.jpg'
    if os.path.isfile(filepath): return filepath
    else: return 0
 def get_file_path(path, uuid, fileformat):
    files=os.listdir(path+'/'+uuid)
    if files:
        for f in files:
            fname, ext=os.path.splitext(f)
            if ext =='.'+fileformat:
                return path+'/'+uuid+'/'+f
        else: return 0
    else: return 0
 def get_cover(path, book, map):
    url=book['source']['cover']
    if map:
        pu=urllib.parse.urlparse(url)
        pu=(pu[0], map, *pu[2:])
        print(pu)
        url=urllib.parse.urlunparse(pu)
    print("Downloading cover from:", url)
    r=requests.get(url, timeout=(20, 3), verify=False)
    r.raise_for_status()
    filepath=path+'/'+book['uuid']+'/cover.jpg'
    os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True)
    with open(filepath+".tmp", 'wb') as fd:
        fd.write(r.content)
        shutil.move(filepath+".tmp", filepath)
        print("Saved to:", filepath)
 def download_covers(dir='my_books', server='', map=""):
    """ Download covers for each books"""
    for root, dirs, files in os.walk(dir, topdown=True):
        for d in dirs:
            # print() 
            # print("-->", d) 
            book = load_metadata(root, d)
            if book:
                # if book['source']['status'] != "ignored":
                if True:
                    if not get_cover_path(root, book['uuid']): 
                        print() 
                        print("-->", d) 
                        print(book['uuid'])
                        try:
                            get_cover(root, book, map)
                        except:
                            print ("Unable to get cover", book['uuid'])   
                    else:
                        pass
                        # print ("Cover already present:", book['uuid'])
                else:
                    print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status']))
            else:
                print ("No ebook metadata found in:", root)
 def get_file_size(url):
    print("Downloading size:", url)
    r = requests.head(url, verify=False)
    r.raise_for_status()
    size=r.headers['Content-Length']
    print("Size received="+ hsize(size))
    return int(size)
 def get_file(path, book, format, session, map, map_lib):
    uuid = book['uuid']
    url=book['source']['formats'][format]['url']
    if map:
        pu=urllib.parse.urlparse(url)
        pu=(pu[0], map, *pu[2:])
        print(pu)
        url=urllib.parse.urlunparse(pu)
    if map_lib:
        # pu=urllib.parse.urlparse(url)
        # print(pu)
        url_s=url.split("/")
        # print(url_s)
        url_s=url_s[:-1]+[map_lib] 
        # print('/'.join(url_s))
        url='/'.join(url_s)
    print()
    print("Downloading ebook:", url)
    print("Size expected (estimation):", hsize(book['source']['formats'][format]['size']))
    r = session.get(url, timeout=(25,15), verify=False)
    # headers = {"Range": "bytes=0-1023"}
    # r = requests.get(url, headers=headers)
    r.raise_for_status()
    # print(r.headers)
    if('Content-Length' in r.headers ): 
        print("Size received="+hsize(r.headers['Content-Length']))
    else:
        print("Fize received")
    filename=re.findall(r'filename="(.*)"', r.headers['Content-Disposition'])
    # print(filename)
    if len(filename):
        filepath=path+'/'+uuid+'/'+filename[0]
    else:
        filepath=path+'/'+uuid+'/'+uuid+"."+format
    os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True)
    with open(filepath+".tmp", 'wb') as fd:
        fd.write(r.content)
        shutil.move(filepath+".tmp", filepath)
        print("Saved to:", filepath)
 def set_status(uuid, status, dir='.'):
    book = load_metadata(dir, uuid)
    if book:
        if book['source']['status'] != status: 
            book['source']['status'] = status
            save_metadata(dir, book)
            print("Status changed to", status+":", book['uuid'], "(", book['title'], ")")
        else:
            print("Status unchanged changed ", status+":", book['uuid'])
    else:
        print ("No ebook metadata found for:", uuid)
 def remove_book(uuid, path='.'):
    print(os.getcwd())
    bookpath=path+'/'+uuid
    if os.path.isdir(bookpath): 
        try:
            shutil.rmtree(bookpath)
            print(uuid, "removed")
        except:
            print("problem")
    else:
        print(uuid, "not found")
 def update_done_status(book):
    source=book['source']
    if source['status']!='ignored':
        if set(source['formats'].keys()) == set(book['formats']) & set(source['formats'].keys()):
            book['source']['status']="done"
        else: 
            book['source']['status']="todo"
 def index_ebooks(site, library="", start=0, stop=0, dir="my_books", inc=1000, force_refresh=False):
    """
    Index a remote Calibre library
    You will get in your <dir> all the metadata (title, authors, isbn, ...) for each book. 
    They're stored as simple JSON files (metadata.json) so that you can easily visualize them or process them with 'jq' program.
    They are stored in subdirectories with a UUID as a name. These directories do match different books and allow you to group all 
    the different formats of the same book and eventually the cover file.
    You can mix books from different sites without any (theoric) collisions  
    Params:
    --site=<string>                         :   Url of the site to index (ex: http://123.123.123.123/)
    --library=<string>  (default=my_books)  :   Id of library to index. The script index the default library by default.
                                                The id is string following '&library_id=' in the url
    --force-refresh     (defaul=False)      :   Force a refresh of the metadata. By default all the metdata 
                                                already gathered are ignored
    --start=<int>       (default=0)
    --stop=<int>        (default=0)         :   Allow indexing between a range of ebooks
    --inc=<int>         (default=1000)      :   Fix the number of ebooks for each request one the server   
    """
    os.makedirs(dir, exist_ok=True)
    offset= 0 if not start else start-1
    num=min(1000,inc)
    server=site.rstrip('/')
    api=server+'/ajax/'
    library= '/'+library if library else library 
    print("Server:", server)
    url=api+'search'+library+'?num=0'
    print()
    print("Getting ebooks count:", server)
    try:
        r = requests.get(url,verify=False)
        r.raise_for_status()
    except:
        print("Unable to open site:", url)
        sys.exit(1)
    print("Total count=",r.json()["total_num"])
    total_num=int(r.json()["total_num"])
    total_num= total_num if not stop else stop
    print()
    print("Start indexing")
    range=offset+1
    while offset < total_num:
        remaining_num = min(num, total_num - offset)
        # print()
        # print("Downloading ids: offset="+str(offset), "num="+str(remaining_num))
        url=api+'search'+library+'?num='+str(remaining_num)+'&offset='+str(offset)+'&sort=timestamp&sort_order=desc'
        # print("->", url)
        r=requests.get(url, verify=False)
        # print("Ids received from:"+str(offset), "to:"+str(offset+remaining_num-1))
        # print()
        # print("\rDownloading metadata from", str(offset+1), "to", str(offset+remaining_num),end='')
        books_s=",".join(str(i) for i in r.json()['book_ids'])
        url=api+'books'+library+'?ids='+books_s
        # print("->", url)
        r=requests.get(url, verify=False)
        # print(len(r.json()), "received")
        for id, r_book in r.json().items():                
                uuid=r_book['uuid']
                if not uuid:
                    print ("No uuid for ebook: ignored")
                    continue 
                if r_book['authors']:
                    desc= f"uuid={uuid} ({r_book['title']} / {r_book['authors'][0]})"
                else:
                    desc= f"uuid={uuid} ({r_book['title']})"
                s=f"\r--> {range}/{total_num} - {desc}"
                s='{:140.140}'.format(s)
                print (s, end='')
                if not force_refresh:
                    try:
                        book = load_metadata(dir, uuid)
                    except:
                        print()
                        print("Unable to get metadata from:", uuid)
                        range+=1
                        continue
                    if book:
                        # print("Metadata already present for:", uuid)
                        range+=1
                        continue
                if not r_book['formats']:
                    print()
                    print("No format found for {}".format(r_book['uuid']))
                    range+=1
                    continue
                book={}
                url=api+'book/'+id
                book['title']=r_book['title']
                book['authors']=r_book['authors']
                book['series']=r_book['series']
                book['series_index']=r_book['series_index']
                book['edition']=0
                book['uuid']=r_book['uuid']
                book['identifiers']=r_book['identifiers']
                book['comments']=r_book['comments']
                book['pubdate']=r_book['pubdate']
                book['publisher']=r_book['publisher']
                languages=r_book['languages']
                if not languages:
                # if True:
                    if book['comments']:
                        text=book['comments']
                    else:
                        text=book['title']
                    s_language, prob=identifier.classify(text)
                    if prob >= 0.85:
                        language =  iso639.to_iso639_2(s_language)
                        book['languages']=[language]
                    else:
                        book['languages']=[]
                else:
                    book['languages']=[]
                    for l in languages:
                        book['languages'].append(iso639.to_iso639_2(l))
                book['tags']=r_book['tags']
                book['formats']=[]
                book['metadata_version']=0.1
                source={}
                source['url']=url+library
                source['id']=id
                try:
                    tmpbook = load_metadata(dir, uuid)
                except:
                    print("Unable to get metadata from:", uuid)
                    range+=1
                    continue
                if tmpbook and tmpbook['source']['status']=="ignored":
                    source['status']="ignored"
                else:
                    source['status']="todo"
                source['cover']=server+r_book['cover']  
                source['timestamp']=r_book['timestamp']
                format_sources={}
                formats=r_book['formats']
                for f in formats:
                    s={}    
                    url=''
                    if f in r_book['main_format']:
                        url=r_book['main_format'][f]
                    else:
                        url=r_book['other_formats'][f]
                    s['url']=server+url
                    if 'size' in r_book['format_metadata'][f]:
                        s['size']=int(r_book['format_metadata'][f]['size'])
                    else:
                        print()
                        print("Size not found for format '{}' : {}".format(f, uuid))
                        print("Trying to get size online: {}".format(s['url']))
                        try:
                            s['size']=get_file_size(s['url'])
                        except:
                            print("Unable to access format '{}' : {} skipped".format(f, uuid))
                            continue
                    s['status']='todo'
                    format_sources[f]=s
                source['formats']=format_sources
                book['source']=source
                if not source['formats']:
                    print("No format found for {}".format(r_book['uuid']))
                    range+=1
                    continue
                update_done_status(book)
                # print("Saving metadata for:", uuid)
                try:
                    save_metadata(dir, book)
                except:
                    print()
                    print("Unable to save book metadata", book['uuid'])
                range+=1
        offset=offset+num
    print()
    print("Done")
 def has_languages(book, languages=[], ignore_empty_language=False):
    # print("Accepted languages", languages)
    if not ignore_empty_language:
            # print("Unknown language accepted")
            pass
    # rustine
    if not 'languages' in book:        
        book['languages']=[]
    # print("Book languages", book['languages'])
    if ignore_empty_language and not book['languages']:
        # print ("'{}' ignored: language is empty".format(book['uuid']))
        return False
    if not ignore_empty_language and not book['languages']:
        # print ("'{}' todo: language is empty".format(book['uuid']))
        return True
    expected_languages=list(set(book['languages']) & set(languages))
    if languages and not expected_languages:
        # print ("'{}' ignored: language {} not in {}".format(book['uuid'], book['languages'],languages))
        return False
    # print ("'{}' todo: expected languages {}".format(book['uuid'], expected_languages))
    return True
 def has_identifiers(book, identifiers=[], ignore_empty_identifiers=False):
    # print("Accepted identifiers", identifiers)
    if not ignore_empty_identifiers:
            # print("Unknown identifiers accepted")
            pass
    # print("Book identifiers", book['identifiers'].keys())
    if ignore_empty_identifiers and not book['identifiers']:
        # print ("'{}' ignored: identifier is empty".format(book['uuid']))
        return False
    if not ignore_empty_identifiers and not book['identifiers']:
        # print ("'{}' todo: identifiers is empty".format(book['uuid']))
        return True
    expected_identifiers=list(set(book['identifiers'].keys()) & set(identifiers))
    if identifiers and not expected_identifiers:
        # print ("'{}' ignored: identifiers {} not in {}".format(book['uuid'], book['identifiers'].keys(), identifiers))
        return False
    # print ("'{}' todo: expected identifiers {}".format(book['uuid'], expected_identifiers))
    return True
 def download_ebooks(dir= 'my_books', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False, timer=0, map="", map_lib=""):
    '''
    Download ebooks in matching subdirs:
    The different formats of the same book are groupe in the same directory 
    with an UUID name close to the metadata file (metadata.json). 
    The status of the formats for a book and its global status are initially set to 'todo'.
    They move to 'done' after their download. This allows you to rerun the download and progressively collect books. 
    You can use different options to filter the formats for the download 
    by language, size, format and identifiers(isbn, ...).  
    A report of the download is displayed at the end of the process. 
    You can run this command in dry mode (--dry-run) with different settings 
    to only display the report and prepare your effective.  
    Params:
    --min-size=<int>            (default=0)
    --max-size=<int>            (default=infinity)  :   Delimit the size in MB for the accepted formats    
    --dry-run                   (defaul=False)      :   Run the command to simulate the download   
    --language=<string>                             :   Restrict the download to a list of specific languages 
                                                        (Ex: --languages='["eng","ita"]'
    --ignore-empty-language     (defaul=False)      :   Ignore books with unidentfied language   
    --formats=<string>                              :   Restrict the download to a list of specific formats 
                                                        (Ex: --formats='["epub", "mobi", "pdf"]'
    --ignore-formats=<string>                       :   Ignore the formats of a list of specific. 
                                                        Compliant with --formats.
                                                        (Ex: --ignored-formats='["mp3", "rar", "zip"]'
    --single-format             (defaul=False)      :   Limit the download to 1 format per book with this preference order
                                                        'azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub',
                                                        'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar'
                                                        , 'rtf', 'txt', 'zip', 'fb2'
    --identifiers=<string>                          :   Restrict the download to a list of specific identifiers 
                                                        (Ex: --identifiers='["isbn","asin"]'
    --ignore-empty-identifiers  (defaul=False)      :   Ignore books without identifiers (often OCR)   
    '''
    # all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip']        
    print()
    if single_format: my_formats = formats if formats else all_ordered_formats 
    else: my_formats=formats
    # print("formats=", my_formats)
    min_size=int(min_size)*1024*1024
    max_size=int(max_size)*1024*1024
    print ("Format expected between {} and {}".format(hsize(min_size), hsize(max_size) if max_size else "infinity"))
    total_size=0
    total_size_by_format={}
    total_ebook_count=0
    total_format_count=0
    total_count_by_format={}
    size_max=0
    size_min=0
    language_count={}
    identifiers_count={}
    s = requests.Session()
    for root, dirs, files in os.walk(dir, topdown=True):
        for counter, uuid in enumerate(dirs):
            book = load_metadata(root, uuid)
            if book:
                status=book['source']['status']
                if status=="todo":
                    if not has_languages(book, languages=languages, ignore_empty_language=ignore_empty_language):
                        continue
                    if not has_identifiers(book, identifiers=identifiers, ignore_empty_identifiers=ignore_empty_identifiers):
                        continue
                    source=book['source']
                    download_formats = get_formats_to_download(book, accepted_formats=my_formats, single_format=single_format, ignored_formats=ignored_formats, max_size=max_size, min_size=min_size)
                    if not len(download_formats):
                        # print ("'{}' ignored: no more format available in formats expected {}".format(uuid, download_formats))
                        # print()
                        pass
                    else:
                        ebook_kept=False
                        for f in download_formats:
                            url = source['formats'][f]['url']
                            # if map:
                            #     pu=urllib.parse.urlparse(url)
                            #     pu=(pu[0], map, *pu[2:])
                            #     print(pu)
                            #     print(urllib.parse.urlunparse(pu))
                            if url:
                                # # It shouldn't occur: Need to download again
                                if get_file_path(dir, uuid, f):
                                    # print ("Format '{}' already present for {}: Retrying".format(f, uuid))
                                    # print()
                                #     continue
                                # print("Format '{}': size expected={}".format(f, hsize(source['formats'][f]['size'])))
                                    pass
                                # print(f"--> format '{f}' for ({book['title']} / {book['authors'][0]} / {str(book['series'])})")
                                if not dry_run:    
                                    try:
                                        get_file(dir, book, f, s, map, map_lib)
                                        book['formats'].append(f)
                                        book['source']['formats'][f]['status']="done"
                                        if timer:
                                            print(f"Waiting {timer} seconds")
                                        time.sleep(timer)
                                    except Exception as msg:
                                        print("Unable to get book:", url)
                                        print(msg)
                                        time.sleep(5)
                                        continue
                                    save_metadata(dir, book)
                                ebook_kept=True
                                size=source['formats'][f]['size']
                                total_size += size
                                size_max = size if size>size_max else size_max
                                if not size_min: 
                                    size_min = size
                                else: 
                                    size_min = size if size<size_min else size_min
                                if not f in total_size_by_format:
                                    total_size_by_format[f] = size 
                                else: total_size_by_format[f] +=size
                                if not f in total_count_by_format:
                                    total_count_by_format[f] = 1 
                                else: 
                                    total_count_by_format[f]+=1
                                total_format_count +=1
                            else:    
                                # print ("Format '{}' ignored for {} ({}): No url)".format(f, uuid, book['title']))
                                # print()
                                pass
                        if ebook_kept:
                            total_ebook_count+=1
                            if not book['languages']:
                                if not '<unknown>' in language_count:
                                    language_count['<unknown>'] = 1 
                                else:
                                    language_count['<unknown>']+=1
                            else:
                                for l in book['languages']:
                                    if not l in language_count:
                                        language_count[l] = 1 
                                    else:
                                        language_count[l]+=1
                            if not book['identifiers']:
                                if not '<unknown>' in identifiers_count:
                                    identifiers_count['<unknown>'] = 1 
                                else:
                                    identifiers_count['<unknown>']+=1
                            else:
                                for l in book['identifiers'].keys():
                                    if not l in identifiers_count:
                                        identifiers_count[l] = 1 
                                    else:
                                        identifiers_count[l]+=1
                    if not dry_run:
                        update_done_status(book)
                        if book['source']['status']=="done":
                            save_metadata(dir, book)
                            print("Book done:", book['uuid'])
                            print()
                    # total_ebook_count+=1
                else:
                    # print()
                    # print("-->", uuid, "("+book['title']+")")
                    # print ('{} in status "{}": skipped'.format(book['uuid'], status))
                    # print(f"--> {uuid} ({book['title']}) in status {status}: skipped", end="\r")
                    # print(f"--> {uuid} ({book['title']})", end="\r")
                    print(f'--> {counter} books handled', end="\r")
    print()
    print("Reporting ...")
    table_l = BeautifulTable()
    table_l.column_headers = ["Language", "Ebooks count"]
    for l, c in language_count.items():
        table_l.append_row([l, c])
    table_l.sort("Ebooks count", reverse=True)
    table_l=table_l[0:10]
    table_i = BeautifulTable()
    table_i.column_headers = ["Identifier", "Ebooks count"]
    for i, c in identifiers_count.items():
        table_i.append_row([i, c])
    table_i.sort("Ebooks count", reverse=True)
    table_i=table_i[0:10]
    print()
    print("Top 10 ebooks by language/identifier:")
    table = BeautifulTable()
    table.column_headers = ["Languages", "Identifiers"]
    table.append_row([table_l, table_i])
    # table.set_style(BeautifulTable.STYLE_MARKDOWN)
    print(table)
    print()
    print("Total count of ebooks by format:")
    table = BeautifulTable()
    table.column_headers = ["Format", "Size", "Ebooks count"]
    for f in total_count_by_format.keys():
        table.append_row([f, hsize(total_size_by_format[f]),total_count_by_format[f]])
    table.sort("Ebooks count", reverse=True)
    # table.set_style(BeautifulTable.STYLE_MARKDOWN)
    print(table)
    table_c = BeautifulTable()
    table_c.column_headers = ["", "Total count"]
    table_c.append_row(["Formats", total_format_count])
    table_c.append_row(["Ebooks", total_ebook_count])
    table_s = BeautifulTable()
    table_s.column_headers = ["", "Size"]
    # table.append_row(["Min", hsize(size_min)])
    table_s.append_row(["Biggest File", hsize(size_max)])
    table_s.append_row(["Total",  hsize(total_size)])
    print()
    print("Summary:")
    table = BeautifulTable()
    table.column_headers = ["Total Count", "Total Size"]
    table.append_row([table_c, table_s])
    # table.set_style(BeautifulTable.STYLE_MARKDOWN)
    print(table)
    print()
 def get_formats_to_download(book, accepted_formats=[], ignored_formats=[], single_format=False, min_size=0, max_size=0):
    # print("Accepted formats", accepted_formats)
    source=book['source']
    # print("Formats available in source: {}".format(list(source['formats'].keys())))
    my_formats=[]
    for f,v in source['formats'].items():
        if v['status']=='todo':
            my_formats.append(f)
    # print("Formats in 'todo': {}".format(my_formats))
    formats=[]
    if single_format:
        if accepted_formats:
            for f in accepted_formats:
                if f in my_formats:
                    formats=[f]
                    break
        else: 
            print("need at least 1 format for ordering")
    else:
        if accepted_formats: 
            formats=list(set(accepted_formats) & set(my_formats))
        elif ignored_formats: 
            formats = list(set(my_formats) - set(ignored_formats))
        else:
            formats=my_formats
    # print("Formats expected: {}".format(formats))
    download_formats=formats[:]
    for f in formats:
        if not 'size' in source['formats'][f] and max_size:
            # print ("Format '{}' ignored for {}: Size unknown".format(f, book['uuid']))
            download_formats.remove(f)
        else:
            size = source['formats'][f]['size']
            if size < min_size or (max_size and size > max_size):
                download_formats.remove(f)
                # print ("Format '{}' ignored for {}: size={} but expected between {} and {}".format(f, book['uuid'], hsize(size), hsize(min_size), hsize(max_size) if max_size else "infinity"))
    return download_formats
 def update_format_statuses(book,refresh_ignored):
    formats=book['source']['formats']
    for f, v in formats.items():
        if v['status']=='ignored' and not refresh_ignored:
            # print ("Format '{}' ignored: {} ({}))".format(f, book['uuid'], book['title']))
            pass
        else:
            # print ("Format '{}' todo: {} ({}))".format(f, book['uuid'], book['title']))
            book['source']['formats'][f]['status']='todo'
 import glob
 def check_ebooks(dir= 'my_books', dry_run=True):
    '''
    Check ebooks:
    '''
    print("Checking ...")
    for root, dirs, files in os.walk(dir, topdown=True):
        for counter, uuid in enumerate(dirs):
            book = load_metadata(root, uuid)
            if book:
                status=book['source']['status']
                if status=="todo":
                    print(status)
                    source=book['source']
                    update=False
                    for f, v in source["formats"].items():
                        print(uuid, f, v['status'])                                    
                        if v['status']=="todo":
                            formats= glob.glob(root+"/"+uuid+"/*."+f)
                            print(formats)
                            if formats:
                                print(book['uuid'], formats[0])
                                book['source']['formats'][f]['status']="done"
                                update=True
                    if not dry_run and update:
                        update_done_status(book)
                        save_metadata(dir, book)
                        print("Book done", book['uuid'])
                        print()
    print()
 if __name__ == "__main__":
    fire.Fire({
        "index_ebooks": index_ebooks,
        "download_ebooks": download_ebooks,
        "download_covers": download_covers,
        "set_status": set_status,
        "check_ebooks": check_ebooks
        })