diff --git a/README.md b/README.md
index e8dfe03..22ff44d 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,11 @@ Note that you can add multiple hosts from which you want to accept API requests.
a space.
**IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *`
-Configure another environment variable: `MAT2_MAX_FILES_BULK_DOWNLOAD=10`
+Configure the following environment variables:
+
+ - `MAT2_MAX_FILES_BULK_DOWNLOAD=10` Max number of files that can be grouped for a bulk download.
+ - `MAT2_MAX_FILE_AGE_FOR_REMOVAL=900` Seconds a file in the upload folder is kept.
+ After that it will be deleted. Default `15 * 60`
This specifies the max number of files that can be bulk downloaded using the api.
Note: Each file has a max file size of 16mb
@@ -66,10 +70,6 @@ systemctl restart nginx/apache/…
It should now be working.
-You should add `find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;`
-in a crontab to remove files that people might have uploaded but never
-downloaded.
-
# Deploy via Ansible
If you happen to be using [Ansible](https://www.ansible.com/), there's an
@@ -92,10 +92,6 @@ https://0xacab.org/jvoisin/mat2-web/container_registry
Example:
`docker run -p 80:80 -d -e MAT2_ALLOW_ORIGIN_WHITELIST='https://myhost1.org' registry.0xacab.org/jvoisin/mat2-web:latest`
-Make sure to add
-`find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;` as cron job
-run inside the container.
-
# Development
Install docker and docker-compose and then run `docker-compose up` to setup
the docker dev environment. Mat2-web is now accessible on your host machine at `localhost:5000`.
diff --git a/docker-compose.yml b/docker-compose.yml
index e925447..36678c0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,6 +9,7 @@ services:
- FLASK_ENV=development
- MAT2_ALLOW_ORIGIN_WHITELIST=*
- MAT2_MAX_FILES_BULK_DOWNLOAD=10
+ - MAT2_MAX_FILE_AGE_FOR_REMOVAL=60
ports:
- "5000:5000"
volumes:
diff --git a/file_removal_scheduler.py b/file_removal_scheduler.py
new file mode 100644
index 0000000..2ce7912
--- /dev/null
+++ b/file_removal_scheduler.py
@@ -0,0 +1,26 @@
+import glob
+import time
+import sys
+import os
+import random
+
+
+def run_file_removal_job(upload_folder_path):
+ if random.randint(0, 10) == 0:
+ for file in glob.glob(upload_folder_path + '/*'):
+ delete_file_when_too_old(file)
+
+
+def delete_file_when_too_old(filepath):
+ file_mod_time = os.stat(filepath).st_mtime
+
+ # time in second since last modification of file
+ last_time = time.time() - file_mod_time
+
+ # if file is older than our configured max timeframe, delete it
+ if last_time > int(os.environ.get('MAT2_MAX_FILE_AGE_FOR_REMOVAL', 15 * 60)):
+ try:
+ os.remove(filepath)
+ except OSError:
+ print('Automatic File Removal failed on file: ' + str(filepath))
+ sys.exit(1)
diff --git a/main.py b/main.py
index bd1a0c3..4cdb43e 100644
--- a/main.py
+++ b/main.py
@@ -10,6 +10,7 @@ import zipfile
from cerberus import Validator
import utils
+import file_removal_scheduler
from libmat2 import parser_factory
from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request
from flask_restful import Resource, Api, reqparse, abort
@@ -25,31 +26,36 @@ def create_app(test_config=None):
app.config['UPLOAD_FOLDER'] = './uploads/'
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB
app.config['CUSTOM_TEMPLATES_DIR'] = 'custom_templates'
- app.config.from_object('config') # optionally load settings from config.py
+ # optionally load settings from config.py
+ app.config.from_object('config')
+
+ if test_config is not None:
+ app.config.update(test_config)
app.jinja_loader = jinja2.ChoiceLoader([ # type: ignore
jinja2.FileSystemLoader(app.config['CUSTOM_TEMPLATES_DIR']),
app.jinja_loader,
- ])
+ ])
api = Api(app)
CORS(app, resources={r"/api/*": {"origins": utils.get_allow_origin_header_value()}})
@app.route('/download//')
- def download_file(key: str, filename:str):
+ def download_file(key: str, filename: str):
if filename != secure_filename(filename):
return redirect(url_for('upload_file'))
complete_path, filepath = get_file_paths(filename)
+ file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER'])
if not os.path.exists(complete_path):
return redirect(url_for('upload_file'))
if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
return redirect(url_for('upload_file'))
-
@after_this_request
def remove_file(response):
- os.remove(complete_path)
+ if os.path.exists(complete_path):
+ os.remove(complete_path)
return response
return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
@@ -176,9 +182,11 @@ def create_app(test_config=None):
complete_path, filepath = is_valid_api_download_file(filename, key)
# Make sure the file is NOT deleted on HEAD requests
if request.method == 'GET':
+ file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER'])
@after_this_request
def remove_file(response):
- os.remove(complete_path)
+ if os.path.exists(complete_path):
+ os.remove(complete_path)
return response
return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
diff --git a/requirements.txt b/requirements.txt
index 42b75e2..61f9711 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,4 @@ mat2==0.9.0
flask==1.0.3
Flask-RESTful==0.3.7
Flask-Cors==3.0.8
-Cerberus==1.3.1
\ No newline at end of file
+Cerberus==1.3.1
diff --git a/templates/download.html b/templates/download.html
index c6a4f07..e39b16b 100644
--- a/templates/download.html
+++ b/templates/download.html
@@ -14,7 +14,7 @@ mat2 could not remove all the metadata from {{ filename }}
, th
{%endif %}
-⇩ Download cleaned file
+⇩ Download cleaned file
diff --git a/test/test.py b/test/test.py
index 34245d9..a7e6627 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,9 +1,12 @@
+import base64
import unittest
import tempfile
import shutil
import io
import os
+from unittest.mock import patch
+
import main
@@ -62,6 +65,33 @@ class Mat2WebTestCase(unittest.TestCase):
rv.data)
self.assertEqual(rv.status_code, 200)
+ def test_get_upload_no_selected_file(self):
+ rv = self.app.post('/',
+ data=dict(
+ file=(io.BytesIO(b""), ''),
+ ), follow_redirects=True)
+ self.assertIn(b'No selected file',
+ rv.data)
+ self.assertEqual(rv.status_code, 200)
+
+ def test_failed_cleaning(self):
+ zip_file_bytes = base64.b64decode(
+ 'UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qtd29ya2luZy1le'
+ 'HRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAAAAACAAAAAAAAAFBL'
+ 'AwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUDQAH6JqPXeiaj13omo9d'
+ 'dXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQDFAAIAAgA+Jw8TwAAAAACA'
+ 'AAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXdvcmtpbmctZXh0VVQNAAfkmo9'
+ 'd5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6nDxPAAAAAAIAAAAAAAAACQAgA'
+ 'AAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXeiaj111eAsAAQTpAwAABOkDAAB'
+ 'QSwUGAAAAAAIAAgC8AAAAwAAAAAAA'
+ )
+ rv = self.app.post('/',
+ data=dict(
+ file=(io.BytesIO(zip_file_bytes), 'test.zip'),
+ ), follow_redirects=True)
+ self.assertIn(b'Unable to clean',rv.data)
+ self.assertEqual(rv.status_code, 200)
+
def test_get_upload_no_file_name(self):
rv = self.app.post('/',
data=dict(
@@ -97,6 +127,29 @@ class Mat2WebTestCase(unittest.TestCase):
rv = self.app.get('/download/70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt')
self.assertEqual(rv.status_code, 302)
+ @patch('file_removal_scheduler.random.randint')
+ def test_upload_leftover(self, randint_mock):
+ randint_mock.return_value = 0
+ os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
+ app = main.create_app()
+ self.upload_folder = tempfile.mkdtemp()
+ app.config.update(
+ TESTING=True,
+ UPLOAD_FOLDER=self.upload_folder
+ )
+ app = app.test_client()
+
+ request = self.app.post('/',
+ data=dict(
+ file=(io.BytesIO(b"Some text"), 'test.txt'),
+ ), follow_redirects=True)
+ self.assertEqual(request.status_code, 200)
+ request = app.get(
+ b'/download/4c2e9e6da31a64c70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt'
+ )
+ self.assertEqual(302, request.status_code)
+ os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999'
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_api.py b/test/test_api.py
index de297c4..3074bd5 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -4,9 +4,10 @@ import json
import os
import shutil
import zipfile
-
from six import BytesIO
+from unittest.mock import patch
+
import main
@@ -122,6 +123,23 @@ class Mat2APITestCase(unittest.TestCase):
rv = self.app.get('/api/extension', headers={'Origin': 'origin1.gnu'})
self.assertEqual(rv.headers['Access-Control-Allow-Origin'], 'origin1.gnu')
+ def test_api_cleaning_failed(self):
+ request = self.app.post('/api/upload',
+ data='{"file_name": "test_name.zip", '
+ '"file": "UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qt'
+ 'd29ya2luZy1leHRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAA'
+ 'AAACAAAAAAAAAFBLAwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUD'
+ 'QAH6JqPXeiaj13omo9ddXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQD'
+ 'FAAIAAgA+Jw8TwAAAAACAAAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXd'
+ 'vcmtpbmctZXh0VVQNAAfkmo9d5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6'
+ 'nDxPAAAAAAIAAAAAAAAACQAgAAAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXe'
+ 'iaj111eAsAAQTpAwAABOkDAABQSwUGAAAAAAIAAgC8AAAAwAAAAAAA"}',
+ headers={'content-type': 'application/json'}
+ )
+ error = json.loads(request.data.decode('utf-8'))['message']
+ self.assertEqual(error, 'Unable to clean application/zip')
+
+
def test_api_download(self):
request = self.app.post('/api/upload',
data='{"file_name": "test_name.jpg", '
@@ -263,7 +281,6 @@ class Mat2APITestCase(unittest.TestCase):
)
response = json.loads(request.data.decode('utf-8'))
- print(response)
self.assertEqual(response['message']['download_list'][0]['0'][0]['file_name'][0], 'required field')
self.assertEqual(response['message']['download_list'][0]['0'][0]['key'][0], 'required field')
self.assertEqual(request.status_code, 400)
@@ -344,6 +361,34 @@ class Mat2APITestCase(unittest.TestCase):
response = json.loads(request.data.decode('utf-8'))
self.assertEqual('File not found', response['message'])
+ @patch('file_removal_scheduler.random.randint')
+ def test_api_upload_leftover(self, randint_mock):
+ os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
+ app = main.create_app()
+ self.upload_folder = tempfile.mkdtemp()
+ app.config.update(
+ TESTING=True,
+ UPLOAD_FOLDER=self.upload_folder
+ )
+ app = app.test_client()
+ randint_mock.return_value = 1
+ self.upload_download_test_jpg_and_assert_response_code(app, 200)
+ randint_mock.return_value = 0
+ self.upload_download_test_jpg_and_assert_response_code(app, 404)
+
+ os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999'
+
+ def upload_download_test_jpg_and_assert_response_code(self, app, code):
+ request = app.post('/api/upload',
+ data='{"file_name": "test_name.jpg", '
+ '"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAf'
+ 'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}',
+ headers={'content-type': 'application/json'}
+ )
+ download_link = json.loads(request.data.decode('utf-8'))['download_link']
+ request = app.get(download_link)
+ self.assertEqual(code, request.status_code)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_file_removal_scheduler.py b/test/test_file_removal_scheduler.py
new file mode 100644
index 0000000..7f6771a
--- /dev/null
+++ b/test/test_file_removal_scheduler.py
@@ -0,0 +1,48 @@
+import unittest
+import tempfile
+from os import path, environ
+import shutil
+
+import file_removal_scheduler
+import main
+
+
+class Mat2WebTestCase(unittest.TestCase):
+ def setUp(self):
+ self.upload_folder = tempfile.mkdtemp()
+ app = main.create_app()
+ app.config.update(
+ TESTING=True,
+ UPLOAD_FOLDER=self.upload_folder
+ )
+ self.app = app
+
+ def test_removal(self):
+ filename = 'test_name.cleaned.jpg'
+ environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
+ open(path.join(self.upload_folder, filename), 'a').close()
+ self.assertTrue(path.exists(path.join(self.upload_folder, )))
+ for i in range(0, 11):
+ file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
+ self.assertFalse(path.exists(path.join(self.upload_folder, filename)))
+
+ open(path.join(self.upload_folder, filename), 'a').close()
+ file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
+ self.assertTrue(path.exists(path.join(self.upload_folder, )))
+
+ def test_non_removal(self):
+ filename = u'i_should_no_be_removed.txt'
+ environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999999'
+ open(path.join(self.upload_folder, filename), 'a').close()
+ self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
+ for i in range(0, 11):
+ file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
+ self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
+
+ def tearDown(self):
+ shutil.rmtree(self.upload_folder)
+
+
+if __name__ == '__main__':
+ unittest.main()
+