mirror of
https://0xacab.org/jvoisin/mat2-web.git
synced 2025-02-23 00:29:56 -05:00
file removal background job
This commit is contained in:
parent
d50f68ae44
commit
7104b2107d
14
README.md
14
README.md
@ -52,7 +52,11 @@ Note that you can add multiple hosts from which you want to accept API requests.
|
||||
a space.
|
||||
**IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *`
|
||||
|
||||
Configure another environment variable: `MAT2_MAX_FILES_BULK_DOWNLOAD=10`
|
||||
Configure the following environment variables:
|
||||
|
||||
- `MAT2_MAX_FILES_BULK_DOWNLOAD=10` Max number of files that can be grouped for a bulk download.
|
||||
- `MAT2_MAX_FILE_AGE_FOR_REMOVAL=900` Seconds a file in the upload folder is kept.
|
||||
After that it will be deleted. Default `15 * 60`
|
||||
|
||||
This specifies the max number of files that can be bulk downloaded using the api.
|
||||
Note: Each file has a max file size of 16mb
|
||||
@ -66,10 +70,6 @@ systemctl restart nginx/apache/…
|
||||
|
||||
It should now be working.
|
||||
|
||||
You should add `find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;`
|
||||
in a crontab to remove files that people might have uploaded but never
|
||||
downloaded.
|
||||
|
||||
# Deploy via Ansible
|
||||
|
||||
If you happen to be using [Ansible](https://www.ansible.com/), there's an
|
||||
@ -92,10 +92,6 @@ https://0xacab.org/jvoisin/mat2-web/container_registry
|
||||
Example:
|
||||
`docker run -p 80:80 -d -e MAT2_ALLOW_ORIGIN_WHITELIST='https://myhost1.org' registry.0xacab.org/jvoisin/mat2-web:latest`
|
||||
|
||||
Make sure to add
|
||||
`find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;` as cron job
|
||||
run inside the container.
|
||||
|
||||
# Development
|
||||
Install docker and docker-compose and then run `docker-compose up` to setup
|
||||
the docker dev environment. Mat2-web is now accessible on your host machine at `localhost:5000`.
|
||||
|
@ -9,6 +9,7 @@ services:
|
||||
- FLASK_ENV=development
|
||||
- MAT2_ALLOW_ORIGIN_WHITELIST=*
|
||||
- MAT2_MAX_FILES_BULK_DOWNLOAD=10
|
||||
- MAT2_MAX_FILE_AGE_FOR_REMOVAL=60
|
||||
ports:
|
||||
- "5000:5000"
|
||||
volumes:
|
||||
|
26
file_removal_scheduler.py
Normal file
26
file_removal_scheduler.py
Normal file
@ -0,0 +1,26 @@
|
||||
import glob
|
||||
import time
|
||||
import sys
|
||||
import os
|
||||
import random
|
||||
|
||||
|
||||
def run_file_removal_job(upload_folder_path):
|
||||
if random.randint(0, 10) == 0:
|
||||
for file in glob.glob(upload_folder_path + '/*'):
|
||||
delete_file_when_too_old(file)
|
||||
|
||||
|
||||
def delete_file_when_too_old(filepath):
|
||||
file_mod_time = os.stat(filepath).st_mtime
|
||||
|
||||
# time in second since last modification of file
|
||||
last_time = time.time() - file_mod_time
|
||||
|
||||
# if file is older than our configured max timeframe, delete it
|
||||
if last_time > int(os.environ.get('MAT2_MAX_FILE_AGE_FOR_REMOVAL', 15 * 60)):
|
||||
try:
|
||||
os.remove(filepath)
|
||||
except OSError:
|
||||
print('Automatic File Removal failed on file: ' + str(filepath))
|
||||
sys.exit(1)
|
14
main.py
14
main.py
@ -10,6 +10,7 @@ import zipfile
|
||||
|
||||
from cerberus import Validator
|
||||
import utils
|
||||
import file_removal_scheduler
|
||||
from libmat2 import parser_factory
|
||||
from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request
|
||||
from flask_restful import Resource, Api, reqparse, abort
|
||||
@ -25,7 +26,11 @@ def create_app(test_config=None):
|
||||
app.config['UPLOAD_FOLDER'] = './uploads/'
|
||||
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB
|
||||
app.config['CUSTOM_TEMPLATES_DIR'] = 'custom_templates'
|
||||
app.config.from_object('config') # optionally load settings from config.py
|
||||
# optionally load settings from config.py
|
||||
app.config.from_object('config')
|
||||
|
||||
if test_config is not None:
|
||||
app.config.update(test_config)
|
||||
|
||||
app.jinja_loader = jinja2.ChoiceLoader([ # type: ignore
|
||||
jinja2.FileSystemLoader(app.config['CUSTOM_TEMPLATES_DIR']),
|
||||
@ -36,19 +41,20 @@ def create_app(test_config=None):
|
||||
CORS(app, resources={r"/api/*": {"origins": utils.get_allow_origin_header_value()}})
|
||||
|
||||
@app.route('/download/<string:key>/<string:filename>')
|
||||
def download_file(key: str, filename:str):
|
||||
def download_file(key: str, filename: str):
|
||||
if filename != secure_filename(filename):
|
||||
return redirect(url_for('upload_file'))
|
||||
|
||||
complete_path, filepath = get_file_paths(filename)
|
||||
file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER'])
|
||||
|
||||
if not os.path.exists(complete_path):
|
||||
return redirect(url_for('upload_file'))
|
||||
if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
|
||||
return redirect(url_for('upload_file'))
|
||||
|
||||
@after_this_request
|
||||
def remove_file(response):
|
||||
if os.path.exists(complete_path):
|
||||
os.remove(complete_path)
|
||||
return response
|
||||
return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
|
||||
@ -176,8 +182,10 @@ def create_app(test_config=None):
|
||||
complete_path, filepath = is_valid_api_download_file(filename, key)
|
||||
# Make sure the file is NOT deleted on HEAD requests
|
||||
if request.method == 'GET':
|
||||
file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER'])
|
||||
@after_this_request
|
||||
def remove_file(response):
|
||||
if os.path.exists(complete_path):
|
||||
os.remove(complete_path)
|
||||
return response
|
||||
|
||||
|
@ -14,7 +14,7 @@ mat2 <b>could not</b> remove all the metadata from <pre>{{ filename }}</pre>, th
|
||||
</ul>
|
||||
{%endif %}
|
||||
</p>
|
||||
<a class="button button-primary" download href='{{ url_for('download_file', key=key, filename=filename) }}'>⇩ Download cleaned file</a>
|
||||
<a class="button button-primary" href='{{ url_for('download_file', key=key, filename=filename) }}'>⇩ Download cleaned file</a>
|
||||
|
||||
<hr/>
|
||||
|
||||
|
53
test/test.py
53
test/test.py
@ -1,9 +1,12 @@
|
||||
import base64
|
||||
import unittest
|
||||
import tempfile
|
||||
import shutil
|
||||
import io
|
||||
import os
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import main
|
||||
|
||||
|
||||
@ -62,6 +65,33 @@ class Mat2WebTestCase(unittest.TestCase):
|
||||
rv.data)
|
||||
self.assertEqual(rv.status_code, 200)
|
||||
|
||||
def test_get_upload_no_selected_file(self):
|
||||
rv = self.app.post('/',
|
||||
data=dict(
|
||||
file=(io.BytesIO(b""), ''),
|
||||
), follow_redirects=True)
|
||||
self.assertIn(b'No selected file',
|
||||
rv.data)
|
||||
self.assertEqual(rv.status_code, 200)
|
||||
|
||||
def test_failed_cleaning(self):
|
||||
zip_file_bytes = base64.b64decode(
|
||||
'UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qtd29ya2luZy1le'
|
||||
'HRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAAAAACAAAAAAAAAFBL'
|
||||
'AwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUDQAH6JqPXeiaj13omo9d'
|
||||
'dXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQDFAAIAAgA+Jw8TwAAAAACA'
|
||||
'AAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXdvcmtpbmctZXh0VVQNAAfkmo9'
|
||||
'd5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6nDxPAAAAAAIAAAAAAAAACQAgA'
|
||||
'AAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXeiaj111eAsAAQTpAwAABOkDAAB'
|
||||
'QSwUGAAAAAAIAAgC8AAAAwAAAAAAA'
|
||||
)
|
||||
rv = self.app.post('/',
|
||||
data=dict(
|
||||
file=(io.BytesIO(zip_file_bytes), 'test.zip'),
|
||||
), follow_redirects=True)
|
||||
self.assertIn(b'Unable to clean',rv.data)
|
||||
self.assertEqual(rv.status_code, 200)
|
||||
|
||||
def test_get_upload_no_file_name(self):
|
||||
rv = self.app.post('/',
|
||||
data=dict(
|
||||
@ -97,6 +127,29 @@ class Mat2WebTestCase(unittest.TestCase):
|
||||
rv = self.app.get('/download/70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt')
|
||||
self.assertEqual(rv.status_code, 302)
|
||||
|
||||
@patch('file_removal_scheduler.random.randint')
|
||||
def test_upload_leftover(self, randint_mock):
|
||||
randint_mock.return_value = 0
|
||||
os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
|
||||
app = main.create_app()
|
||||
self.upload_folder = tempfile.mkdtemp()
|
||||
app.config.update(
|
||||
TESTING=True,
|
||||
UPLOAD_FOLDER=self.upload_folder
|
||||
)
|
||||
app = app.test_client()
|
||||
|
||||
request = self.app.post('/',
|
||||
data=dict(
|
||||
file=(io.BytesIO(b"Some text"), 'test.txt'),
|
||||
), follow_redirects=True)
|
||||
self.assertEqual(request.status_code, 200)
|
||||
request = app.get(
|
||||
b'/download/4c2e9e6da31a64c70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt'
|
||||
)
|
||||
self.assertEqual(302, request.status_code)
|
||||
os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -4,9 +4,10 @@ import json
|
||||
import os
|
||||
import shutil
|
||||
import zipfile
|
||||
|
||||
from six import BytesIO
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import main
|
||||
|
||||
|
||||
@ -122,6 +123,23 @@ class Mat2APITestCase(unittest.TestCase):
|
||||
rv = self.app.get('/api/extension', headers={'Origin': 'origin1.gnu'})
|
||||
self.assertEqual(rv.headers['Access-Control-Allow-Origin'], 'origin1.gnu')
|
||||
|
||||
def test_api_cleaning_failed(self):
|
||||
request = self.app.post('/api/upload',
|
||||
data='{"file_name": "test_name.zip", '
|
||||
'"file": "UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qt'
|
||||
'd29ya2luZy1leHRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAA'
|
||||
'AAACAAAAAAAAAFBLAwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUD'
|
||||
'QAH6JqPXeiaj13omo9ddXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQD'
|
||||
'FAAIAAgA+Jw8TwAAAAACAAAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXd'
|
||||
'vcmtpbmctZXh0VVQNAAfkmo9d5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6'
|
||||
'nDxPAAAAAAIAAAAAAAAACQAgAAAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXe'
|
||||
'iaj111eAsAAQTpAwAABOkDAABQSwUGAAAAAAIAAgC8AAAAwAAAAAAA"}',
|
||||
headers={'content-type': 'application/json'}
|
||||
)
|
||||
error = json.loads(request.data.decode('utf-8'))['message']
|
||||
self.assertEqual(error, 'Unable to clean application/zip')
|
||||
|
||||
|
||||
def test_api_download(self):
|
||||
request = self.app.post('/api/upload',
|
||||
data='{"file_name": "test_name.jpg", '
|
||||
@ -263,7 +281,6 @@ class Mat2APITestCase(unittest.TestCase):
|
||||
)
|
||||
|
||||
response = json.loads(request.data.decode('utf-8'))
|
||||
print(response)
|
||||
self.assertEqual(response['message']['download_list'][0]['0'][0]['file_name'][0], 'required field')
|
||||
self.assertEqual(response['message']['download_list'][0]['0'][0]['key'][0], 'required field')
|
||||
self.assertEqual(request.status_code, 400)
|
||||
@ -344,6 +361,34 @@ class Mat2APITestCase(unittest.TestCase):
|
||||
response = json.loads(request.data.decode('utf-8'))
|
||||
self.assertEqual('File not found', response['message'])
|
||||
|
||||
@patch('file_removal_scheduler.random.randint')
|
||||
def test_api_upload_leftover(self, randint_mock):
|
||||
os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
|
||||
app = main.create_app()
|
||||
self.upload_folder = tempfile.mkdtemp()
|
||||
app.config.update(
|
||||
TESTING=True,
|
||||
UPLOAD_FOLDER=self.upload_folder
|
||||
)
|
||||
app = app.test_client()
|
||||
randint_mock.return_value = 1
|
||||
self.upload_download_test_jpg_and_assert_response_code(app, 200)
|
||||
randint_mock.return_value = 0
|
||||
self.upload_download_test_jpg_and_assert_response_code(app, 404)
|
||||
|
||||
os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999'
|
||||
|
||||
def upload_download_test_jpg_and_assert_response_code(self, app, code):
|
||||
request = app.post('/api/upload',
|
||||
data='{"file_name": "test_name.jpg", '
|
||||
'"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAf'
|
||||
'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}',
|
||||
headers={'content-type': 'application/json'}
|
||||
)
|
||||
download_link = json.loads(request.data.decode('utf-8'))['download_link']
|
||||
request = app.get(download_link)
|
||||
self.assertEqual(code, request.status_code)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
48
test/test_file_removal_scheduler.py
Normal file
48
test/test_file_removal_scheduler.py
Normal file
@ -0,0 +1,48 @@
|
||||
import unittest
|
||||
import tempfile
|
||||
from os import path, environ
|
||||
import shutil
|
||||
|
||||
import file_removal_scheduler
|
||||
import main
|
||||
|
||||
|
||||
class Mat2WebTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.upload_folder = tempfile.mkdtemp()
|
||||
app = main.create_app()
|
||||
app.config.update(
|
||||
TESTING=True,
|
||||
UPLOAD_FOLDER=self.upload_folder
|
||||
)
|
||||
self.app = app
|
||||
|
||||
def test_removal(self):
|
||||
filename = 'test_name.cleaned.jpg'
|
||||
environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
|
||||
open(path.join(self.upload_folder, filename), 'a').close()
|
||||
self.assertTrue(path.exists(path.join(self.upload_folder, )))
|
||||
for i in range(0, 11):
|
||||
file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
|
||||
self.assertFalse(path.exists(path.join(self.upload_folder, filename)))
|
||||
|
||||
open(path.join(self.upload_folder, filename), 'a').close()
|
||||
file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
|
||||
self.assertTrue(path.exists(path.join(self.upload_folder, )))
|
||||
|
||||
def test_non_removal(self):
|
||||
filename = u'i_should_no_be_removed.txt'
|
||||
environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999999'
|
||||
open(path.join(self.upload_folder, filename), 'a').close()
|
||||
self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
|
||||
for i in range(0, 11):
|
||||
file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
|
||||
self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.upload_folder)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user