diff --git a/README.md b/README.md index e8dfe03..22ff44d 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,11 @@ Note that you can add multiple hosts from which you want to accept API requests. a space. **IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *` -Configure another environment variable: `MAT2_MAX_FILES_BULK_DOWNLOAD=10` +Configure the following environment variables: + + - `MAT2_MAX_FILES_BULK_DOWNLOAD=10` Max number of files that can be grouped for a bulk download. + - `MAT2_MAX_FILE_AGE_FOR_REMOVAL=900` Seconds a file in the upload folder is kept. + After that it will be deleted. Default `15 * 60` This specifies the max number of files that can be bulk downloaded using the api. Note: Each file has a max file size of 16mb @@ -66,10 +70,6 @@ systemctl restart nginx/apache/… It should now be working. -You should add `find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;` -in a crontab to remove files that people might have uploaded but never -downloaded. - # Deploy via Ansible If you happen to be using [Ansible](https://www.ansible.com/), there's an @@ -92,10 +92,6 @@ https://0xacab.org/jvoisin/mat2-web/container_registry Example: `docker run -p 80:80 -d -e MAT2_ALLOW_ORIGIN_WHITELIST='https://myhost1.org' registry.0xacab.org/jvoisin/mat2-web:latest` -Make sure to add -`find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;` as cron job -run inside the container. - # Development Install docker and docker-compose and then run `docker-compose up` to setup the docker dev environment. Mat2-web is now accessible on your host machine at `localhost:5000`. diff --git a/docker-compose.yml b/docker-compose.yml index e925447..36678c0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,6 +9,7 @@ services: - FLASK_ENV=development - MAT2_ALLOW_ORIGIN_WHITELIST=* - MAT2_MAX_FILES_BULK_DOWNLOAD=10 + - MAT2_MAX_FILE_AGE_FOR_REMOVAL=60 ports: - "5000:5000" volumes: diff --git a/file_removal_scheduler.py b/file_removal_scheduler.py new file mode 100644 index 0000000..2ce7912 --- /dev/null +++ b/file_removal_scheduler.py @@ -0,0 +1,26 @@ +import glob +import time +import sys +import os +import random + + +def run_file_removal_job(upload_folder_path): + if random.randint(0, 10) == 0: + for file in glob.glob(upload_folder_path + '/*'): + delete_file_when_too_old(file) + + +def delete_file_when_too_old(filepath): + file_mod_time = os.stat(filepath).st_mtime + + # time in second since last modification of file + last_time = time.time() - file_mod_time + + # if file is older than our configured max timeframe, delete it + if last_time > int(os.environ.get('MAT2_MAX_FILE_AGE_FOR_REMOVAL', 15 * 60)): + try: + os.remove(filepath) + except OSError: + print('Automatic File Removal failed on file: ' + str(filepath)) + sys.exit(1) diff --git a/main.py b/main.py index bd1a0c3..4cdb43e 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,7 @@ import zipfile from cerberus import Validator import utils +import file_removal_scheduler from libmat2 import parser_factory from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request from flask_restful import Resource, Api, reqparse, abort @@ -25,31 +26,36 @@ def create_app(test_config=None): app.config['UPLOAD_FOLDER'] = './uploads/' app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB app.config['CUSTOM_TEMPLATES_DIR'] = 'custom_templates' - app.config.from_object('config') # optionally load settings from config.py + # optionally load settings from config.py + app.config.from_object('config') + + if test_config is not None: + app.config.update(test_config) app.jinja_loader = jinja2.ChoiceLoader([ # type: ignore jinja2.FileSystemLoader(app.config['CUSTOM_TEMPLATES_DIR']), app.jinja_loader, - ]) + ]) api = Api(app) CORS(app, resources={r"/api/*": {"origins": utils.get_allow_origin_header_value()}}) @app.route('/download//') - def download_file(key: str, filename:str): + def download_file(key: str, filename: str): if filename != secure_filename(filename): return redirect(url_for('upload_file')) complete_path, filepath = get_file_paths(filename) + file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER']) if not os.path.exists(complete_path): return redirect(url_for('upload_file')) if hmac.compare_digest(utils.hash_file(complete_path), key) is False: return redirect(url_for('upload_file')) - @after_this_request def remove_file(response): - os.remove(complete_path) + if os.path.exists(complete_path): + os.remove(complete_path) return response return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True) @@ -176,9 +182,11 @@ def create_app(test_config=None): complete_path, filepath = is_valid_api_download_file(filename, key) # Make sure the file is NOT deleted on HEAD requests if request.method == 'GET': + file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER']) @after_this_request def remove_file(response): - os.remove(complete_path) + if os.path.exists(complete_path): + os.remove(complete_path) return response return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True) diff --git a/requirements.txt b/requirements.txt index 42b75e2..61f9711 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ mat2==0.9.0 flask==1.0.3 Flask-RESTful==0.3.7 Flask-Cors==3.0.8 -Cerberus==1.3.1 \ No newline at end of file +Cerberus==1.3.1 diff --git a/templates/download.html b/templates/download.html index c6a4f07..e39b16b 100644 --- a/templates/download.html +++ b/templates/download.html @@ -14,7 +14,7 @@ mat2 could not remove all the metadata from
{{ filename }}
, th {%endif %}

-⇩ Download cleaned file +⇩ Download cleaned file
diff --git a/test/test.py b/test/test.py index 34245d9..a7e6627 100644 --- a/test/test.py +++ b/test/test.py @@ -1,9 +1,12 @@ +import base64 import unittest import tempfile import shutil import io import os +from unittest.mock import patch + import main @@ -62,6 +65,33 @@ class Mat2WebTestCase(unittest.TestCase): rv.data) self.assertEqual(rv.status_code, 200) + def test_get_upload_no_selected_file(self): + rv = self.app.post('/', + data=dict( + file=(io.BytesIO(b""), ''), + ), follow_redirects=True) + self.assertIn(b'No selected file', + rv.data) + self.assertEqual(rv.status_code, 200) + + def test_failed_cleaning(self): + zip_file_bytes = base64.b64decode( + 'UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qtd29ya2luZy1le' + 'HRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAAAAACAAAAAAAAAFBL' + 'AwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUDQAH6JqPXeiaj13omo9d' + 'dXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQDFAAIAAgA+Jw8TwAAAAACA' + 'AAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXdvcmtpbmctZXh0VVQNAAfkmo9' + 'd5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6nDxPAAAAAAIAAAAAAAAACQAgA' + 'AAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXeiaj111eAsAAQTpAwAABOkDAAB' + 'QSwUGAAAAAAIAAgC8AAAAwAAAAAAA' + ) + rv = self.app.post('/', + data=dict( + file=(io.BytesIO(zip_file_bytes), 'test.zip'), + ), follow_redirects=True) + self.assertIn(b'Unable to clean',rv.data) + self.assertEqual(rv.status_code, 200) + def test_get_upload_no_file_name(self): rv = self.app.post('/', data=dict( @@ -97,6 +127,29 @@ class Mat2WebTestCase(unittest.TestCase): rv = self.app.get('/download/70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt') self.assertEqual(rv.status_code, 302) + @patch('file_removal_scheduler.random.randint') + def test_upload_leftover(self, randint_mock): + randint_mock.return_value = 0 + os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0' + app = main.create_app() + self.upload_folder = tempfile.mkdtemp() + app.config.update( + TESTING=True, + UPLOAD_FOLDER=self.upload_folder + ) + app = app.test_client() + + request = self.app.post('/', + data=dict( + file=(io.BytesIO(b"Some text"), 'test.txt'), + ), follow_redirects=True) + self.assertEqual(request.status_code, 200) + request = app.get( + b'/download/4c2e9e6da31a64c70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt' + ) + self.assertEqual(302, request.status_code) + os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999' + if __name__ == '__main__': unittest.main() diff --git a/test/test_api.py b/test/test_api.py index de297c4..3074bd5 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -4,9 +4,10 @@ import json import os import shutil import zipfile - from six import BytesIO +from unittest.mock import patch + import main @@ -122,6 +123,23 @@ class Mat2APITestCase(unittest.TestCase): rv = self.app.get('/api/extension', headers={'Origin': 'origin1.gnu'}) self.assertEqual(rv.headers['Access-Control-Allow-Origin'], 'origin1.gnu') + def test_api_cleaning_failed(self): + request = self.app.post('/api/upload', + data='{"file_name": "test_name.zip", ' + '"file": "UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qt' + 'd29ya2luZy1leHRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAA' + 'AAACAAAAAAAAAFBLAwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUD' + 'QAH6JqPXeiaj13omo9ddXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQD' + 'FAAIAAgA+Jw8TwAAAAACAAAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXd' + 'vcmtpbmctZXh0VVQNAAfkmo9d5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6' + 'nDxPAAAAAAIAAAAAAAAACQAgAAAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXe' + 'iaj111eAsAAQTpAwAABOkDAABQSwUGAAAAAAIAAgC8AAAAwAAAAAAA"}', + headers={'content-type': 'application/json'} + ) + error = json.loads(request.data.decode('utf-8'))['message'] + self.assertEqual(error, 'Unable to clean application/zip') + + def test_api_download(self): request = self.app.post('/api/upload', data='{"file_name": "test_name.jpg", ' @@ -263,7 +281,6 @@ class Mat2APITestCase(unittest.TestCase): ) response = json.loads(request.data.decode('utf-8')) - print(response) self.assertEqual(response['message']['download_list'][0]['0'][0]['file_name'][0], 'required field') self.assertEqual(response['message']['download_list'][0]['0'][0]['key'][0], 'required field') self.assertEqual(request.status_code, 400) @@ -344,6 +361,34 @@ class Mat2APITestCase(unittest.TestCase): response = json.loads(request.data.decode('utf-8')) self.assertEqual('File not found', response['message']) + @patch('file_removal_scheduler.random.randint') + def test_api_upload_leftover(self, randint_mock): + os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0' + app = main.create_app() + self.upload_folder = tempfile.mkdtemp() + app.config.update( + TESTING=True, + UPLOAD_FOLDER=self.upload_folder + ) + app = app.test_client() + randint_mock.return_value = 1 + self.upload_download_test_jpg_and_assert_response_code(app, 200) + randint_mock.return_value = 0 + self.upload_download_test_jpg_and_assert_response_code(app, 404) + + os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999' + + def upload_download_test_jpg_and_assert_response_code(self, app, code): + request = app.post('/api/upload', + data='{"file_name": "test_name.jpg", ' + '"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAf' + 'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}', + headers={'content-type': 'application/json'} + ) + download_link = json.loads(request.data.decode('utf-8'))['download_link'] + request = app.get(download_link) + self.assertEqual(code, request.status_code) + if __name__ == '__main__': unittest.main() diff --git a/test/test_file_removal_scheduler.py b/test/test_file_removal_scheduler.py new file mode 100644 index 0000000..7f6771a --- /dev/null +++ b/test/test_file_removal_scheduler.py @@ -0,0 +1,48 @@ +import unittest +import tempfile +from os import path, environ +import shutil + +import file_removal_scheduler +import main + + +class Mat2WebTestCase(unittest.TestCase): + def setUp(self): + self.upload_folder = tempfile.mkdtemp() + app = main.create_app() + app.config.update( + TESTING=True, + UPLOAD_FOLDER=self.upload_folder + ) + self.app = app + + def test_removal(self): + filename = 'test_name.cleaned.jpg' + environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0' + open(path.join(self.upload_folder, filename), 'a').close() + self.assertTrue(path.exists(path.join(self.upload_folder, ))) + for i in range(0, 11): + file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER']) + self.assertFalse(path.exists(path.join(self.upload_folder, filename))) + + open(path.join(self.upload_folder, filename), 'a').close() + file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER']) + self.assertTrue(path.exists(path.join(self.upload_folder, ))) + + def test_non_removal(self): + filename = u'i_should_no_be_removed.txt' + environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999999' + open(path.join(self.upload_folder, filename), 'a').close() + self.assertTrue(path.exists(path.join(self.upload_folder, filename))) + for i in range(0, 11): + file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER']) + self.assertTrue(path.exists(path.join(self.upload_folder, filename))) + + def tearDown(self): + shutil.rmtree(self.upload_folder) + + +if __name__ == '__main__': + unittest.main() +