From 1035a24707276a97c75a6fd1ecf9f425fb01fc10 Mon Sep 17 00:00:00 2001 From: jfriedli Date: Sun, 12 Jan 2025 12:11:06 +0000 Subject: [PATCH] Added Non-Ascii filename support --- matweb/frontend.py | 2 +- matweb/utils.py | 13 +++++++++++-- test/test.py | 17 ++++++++++++++--- test/test_api.py | 8 ++++---- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/matweb/frontend.py b/matweb/frontend.py index 018fc97..ec9ef23 100644 --- a/matweb/frontend.py +++ b/matweb/frontend.py @@ -68,7 +68,7 @@ def upload_file(): parser, mime = utils.get_file_parser(filepath) except ValueError: flash('The filetype is not supported') - current_app.logger.error('Unsupported filetype',) + current_app.logger.error('Unsupported filetype') return redirect(url_for('routes.upload_file')) try: diff --git a/matweb/utils.py b/matweb/utils.py index 270c5f3..39159e1 100644 --- a/matweb/utils.py +++ b/matweb/utils.py @@ -2,6 +2,8 @@ import hmac import os import hashlib import mimetypes as mtype +import pathlib +import uuid from typing import Tuple from flask_restful import abort, current_app @@ -68,9 +70,16 @@ def get_supported_extensions(): def save_file(file, upload_folder): - filename = secure_filename(file.filename) + path = pathlib.Path(file.filename) + extension = path.suffix + stem = path.stem + + filename = secure_filename(stem) if not filename: - raise ValueError('Invalid Filename') + filename = str(uuid.uuid4()) + + if extension: + filename = str(pathlib.Path(filename).with_suffix(extension)) filepath = os.path.join(upload_folder, filename) file.save(os.path.join(filepath)) return filename, filepath diff --git a/test/test.py b/test/test.py index 4d2907f..d5ef52d 100644 --- a/test/test.py +++ b/test/test.py @@ -187,17 +187,28 @@ class Mat2WebTestCase(TestCase): self.assertIn(b'.mp2', rv.data) self.assertEqual(rv.status_code, 200) - def test_get_upload_naughty_input(self): + def test_get_upload_no_ascii_no_ext_input(self): rv = self.client.post( '/', data=dict( - file=(io.BytesIO(b"a"), '﷽'), + file=(io.BytesIO(b"a"), '﷽.txt'), ), follow_redirects=True ) self.assertEqual(rv.status_code, 200) - self.assertIn(b'Invalid Filename', rv.data) + self.assertIn(b'.cleaned.txt', rv.data) + def test_get_upload_no_ascii_stem_input(self): + pdfBytes = b"%PDF-1.\n 1 0 obj<>endobj\n2 0 obj<>endobj\n3 0 obj<>endobj\ntrailer <>" + rv = self.client.post( + '/', + data=dict( + file=(io.BytesIO(pdfBytes), '한국어.pdf'), + ), + follow_redirects=True + ) + self.assertEqual(rv.status_code, 200) + self.assertIn(b'.cleaned.pdf', rv.data) if __name__ == '__main__': unittest.main() diff --git a/test/test_api.py b/test/test_api.py index 2f52279..0ceb846 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -451,8 +451,8 @@ class Mat2APITestCase(unittest.TestCase): headers={'content-type': 'application/json'} ) error_message = request.get_json()['message'] - self.assertEqual(400, request.status_code) - self.assertEqual("Invalid Filename", error_message) + self.assertEqual(415, request.status_code) + self.assertEqual("The filetype is not supported", error_message) request = self.app.post('/api/upload', data='{"file_name": "﷽", ' @@ -534,8 +534,8 @@ class Mat2APITestCase(unittest.TestCase): ), follow_redirects=False ) - self.assertEqual(r.get_json()['message'], 'Invalid Filename') - self.assertEqual(r.status_code, 400) + self.assertEqual(r.get_json()['message'], 'The filetype is not supported') + self.assertEqual(r.status_code, 415) r = self.app.post( '/api/remove_metadata',