From 0aa5a89adedb1d8aaecac3e9561e3bfb4c6678fb Mon Sep 17 00:00:00 2001
From: Micah Lee <micah@micahflee.com>
Date: Mon, 2 Sep 2019 19:45:14 -0700
Subject: [PATCH] When downloading individual files in either share or website
 mode, gzip the file if needed, and stream the file in such a way that a
 progress bar is possible

---
 onionshare/web/send_base_mode.py | 112 +++++++++++++++++++++++++++++++
 onionshare/web/share_mode.py     |  36 ++--------
 onionshare/web/website_mode.py   |  17 ++---
 3 files changed, 121 insertions(+), 44 deletions(-)

diff --git a/onionshare/web/send_base_mode.py b/onionshare/web/send_base_mode.py
index 6468258a..88dbd008 100644
--- a/onionshare/web/send_base_mode.py
+++ b/onionshare/web/send_base_mode.py
@@ -2,6 +2,7 @@ import os
 import sys
 import tempfile
 import mimetypes
+import gzip
 from flask import Response, request, render_template, make_response
 
 from .. import strings
@@ -148,3 +149,114 @@ class SendBaseModeWeb:
         Inherited class will implement this.
         """
         pass
+
+    def stream_individual_file(self, filesystem_path):
+        """
+        Return a flask response that's streaming the download of an individual file, and gzip
+        compressing it if the browser supports it.
+        """
+        use_gzip = self.should_use_gzip()
+
+        # gzip compress the individual file, if it hasn't already been compressed
+        if use_gzip:
+            if filesystem_path not in self.gzip_individual_files:
+                gzip_filename = tempfile.mkstemp('wb+')[1]
+                self._gzip_compress(filesystem_path, gzip_filename, 6, None)
+                self.gzip_individual_files[filesystem_path] = gzip_filename
+
+                # Make sure the gzip file gets cleaned up when onionshare stops
+                self.cleanup_filenames.append(gzip_filename)
+
+            file_to_download = self.gzip_individual_files[filesystem_path]
+            filesize = os.path.getsize(self.gzip_individual_files[filesystem_path])
+        else:
+            file_to_download = filesystem_path
+            filesize = os.path.getsize(filesystem_path)
+
+        # TODO: Tell GUI the download started
+        #self.web.add_request(self.web.REQUEST_STARTED, path, {
+        #    'id': download_id,
+        #    'use_gzip': use_gzip
+        #})
+
+        def generate():
+            chunk_size = 102400  # 100kb
+
+            fp = open(file_to_download, 'rb')
+            done = False
+            canceled = False
+            while not done:
+                chunk = fp.read(chunk_size)
+                if chunk == b'':
+                    done = True
+                else:
+                    try:
+                        yield chunk
+
+                        # TODO: Tell GUI the progress
+                        downloaded_bytes = fp.tell()
+                        percent = (1.0 * downloaded_bytes / filesize) * 100
+                        if not self.web.is_gui or self.common.platform == 'Linux' or self.common.platform == 'BSD':
+                            sys.stdout.write(
+                                "\r{0:s}, {1:.2f}%          ".format(self.common.human_readable_filesize(downloaded_bytes), percent))
+                            sys.stdout.flush()
+
+                        #self.web.add_request(self.web.REQUEST_PROGRESS, path, {
+                        #    'id': download_id,
+                        #    'bytes': downloaded_bytes
+                        #    })
+                        done = False
+                    except:
+                        # Looks like the download was canceled
+                        done = True
+                        canceled = True
+
+                        # TODO: Tell the GUI the download has canceled
+                        #self.web.add_request(self.web.REQUEST_CANCELED, path, {
+                        #    'id': download_id
+                        #})
+
+            fp.close()
+
+            if self.common.platform != 'Darwin':
+                sys.stdout.write("\n")
+
+        basename = os.path.basename(filesystem_path)
+
+        r = Response(generate())
+        if use_gzip:
+            r.headers.set('Content-Encoding', 'gzip')
+        r.headers.set('Content-Length', filesize)
+        r.headers.set('Content-Disposition', 'inline', filename=basename)
+        r = self.web.add_security_headers(r)
+        (content_type, _) = mimetypes.guess_type(basename, strict=False)
+        if content_type is not None:
+            r.headers.set('Content-Type', content_type)
+        return r
+
+    def should_use_gzip(self):
+        """
+        Should we use gzip for this browser?
+        """
+        return (not self.is_zipped) and ('gzip' in request.headers.get('Accept-Encoding', '').lower())
+
+    def _gzip_compress(self, input_filename, output_filename, level, processed_size_callback=None):
+        """
+        Compress a file with gzip, without loading the whole thing into memory
+        Thanks: https://stackoverflow.com/questions/27035296/python-how-to-gzip-a-large-text-file-without-memoryerror
+        """
+        bytes_processed = 0
+        blocksize = 1 << 16 # 64kB
+        with open(input_filename, 'rb') as input_file:
+            output_file = gzip.open(output_filename, 'wb', level)
+            while True:
+                if processed_size_callback is not None:
+                    processed_size_callback(bytes_processed)
+
+                block = input_file.read(blocksize)
+                if len(block) == 0:
+                    break
+                output_file.write(block)
+                bytes_processed += blocksize
+
+            output_file.close()
diff --git a/onionshare/web/share_mode.py b/onionshare/web/share_mode.py
index b478fbd4..07cf0548 100644
--- a/onionshare/web/share_mode.py
+++ b/onionshare/web/share_mode.py
@@ -3,8 +3,7 @@ import sys
 import tempfile
 import zipfile
 import mimetypes
-import gzip
-from flask import Response, request, render_template, make_response, send_from_directory
+from flask import Response, request, render_template, make_response
 
 from .send_base_mode import SendBaseModeWeb
 from .. import strings
@@ -16,8 +15,10 @@ class ShareModeWeb(SendBaseModeWeb):
     """
     def init(self):
         self.common.log('ShareModeWeb', 'init')
+
         # Allow downloading individual files if "Stop sharing after files have been sent" is unchecked
         self.download_individual_files = not self.common.settings.get('close_after_first_download')
+        self.gzip_individual_files = {}
 
     def define_routes(self):
         """
@@ -207,9 +208,7 @@ class ShareModeWeb(SendBaseModeWeb):
             # If it's a file
             elif os.path.isfile(filesystem_path):
                 if self.download_individual_files:
-                    dirname = os.path.dirname(filesystem_path)
-                    basename = os.path.basename(filesystem_path)
-                    return send_from_directory(dirname, basename)
+                    return self.stream_individual_file(filesystem_path)
                 else:
                     return self.web.error404()
 
@@ -287,33 +286,6 @@ class ShareModeWeb(SendBaseModeWeb):
 
         return True
 
-    def should_use_gzip(self):
-        """
-        Should we use gzip for this browser?
-        """
-        return (not self.is_zipped) and ('gzip' in request.headers.get('Accept-Encoding', '').lower())
-
-    def _gzip_compress(self, input_filename, output_filename, level, processed_size_callback=None):
-        """
-        Compress a file with gzip, without loading the whole thing into memory
-        Thanks: https://stackoverflow.com/questions/27035296/python-how-to-gzip-a-large-text-file-without-memoryerror
-        """
-        bytes_processed = 0
-        blocksize = 1 << 16 # 64kB
-        with open(input_filename, 'rb') as input_file:
-            output_file = gzip.open(output_filename, 'wb', level)
-            while True:
-                if processed_size_callback is not None:
-                    processed_size_callback(bytes_processed)
-
-                block = input_file.read(blocksize)
-                if len(block) == 0:
-                    break
-                output_file.write(block)
-                bytes_processed += blocksize
-
-            output_file.close()
-
 
 class ZipWriter(object):
     """
diff --git a/onionshare/web/website_mode.py b/onionshare/web/website_mode.py
index 82cebdb7..e409e7be 100644
--- a/onionshare/web/website_mode.py
+++ b/onionshare/web/website_mode.py
@@ -2,7 +2,7 @@ import os
 import sys
 import tempfile
 import mimetypes
-from flask import Response, request, render_template, make_response, send_from_directory
+from flask import Response, request, render_template, make_response
 
 from .send_base_mode import SendBaseModeWeb
 from .. import strings
@@ -13,7 +13,7 @@ class WebsiteModeWeb(SendBaseModeWeb):
     All of the web logic for website mode
     """
     def init(self):
-        pass
+        self.gzip_individual_files = {}
 
     def define_routes(self):
         """
@@ -62,10 +62,7 @@ class WebsiteModeWeb(SendBaseModeWeb):
                 index_path = os.path.join(path, 'index.html')
                 if index_path in self.files:
                     # Render it
-                    dirname = os.path.dirname(self.files[index_path])
-                    basename = os.path.basename(self.files[index_path])
-
-                    return send_from_directory(dirname, basename)
+                    return self.stream_individual_file(filesystem_path)
 
                 else:
                     # Otherwise, render directory listing
@@ -80,9 +77,7 @@ class WebsiteModeWeb(SendBaseModeWeb):
 
             # If it's a file
             elif os.path.isfile(filesystem_path):
-                dirname = os.path.dirname(filesystem_path)
-                basename = os.path.basename(filesystem_path)
-                return send_from_directory(dirname, basename)
+                return self.stream_individual_file(filesystem_path)
 
             # If it's not a directory or file, throw a 404
             else:
@@ -94,9 +89,7 @@ class WebsiteModeWeb(SendBaseModeWeb):
                 index_path = 'index.html'
                 if index_path in self.files:
                     # Render it
-                    dirname = os.path.dirname(self.files[index_path])
-                    basename = os.path.basename(self.files[index_path])
-                    return send_from_directory(dirname, basename)
+                    return self.stream_individual_file(self.files[index_path])
                 else:
                     # Root directory listing
                     filenames = list(self.root_files)