mat2-web/matweb/utils.py

119 lines
3.6 KiB
Python
Raw Normal View History

2020-04-23 10:39:35 -07:00
import hmac
import os
import hashlib
import mimetypes as mtype
from flask_restful import abort, current_app
2020-04-23 10:39:35 -07:00
from libmat2 import parser_factory
from werkzeug.utils import secure_filename
def get_allow_origin_header_value():
return os.environ.get('MAT2_ALLOW_ORIGIN_WHITELIST', '*').split(" ")
2020-04-26 09:50:14 -07:00
def hash_file(filepath: str, secret: str) -> str:
"""
The goal of the hmac is to ONLY make the hashes unpredictable
:param filepath: Path of the file
:param secret: a server side generated secret
:return: digest, secret
"""
mac = hmac.new(secret.encode(), None, hashlib.sha256)
2020-04-23 10:39:35 -07:00
with open(filepath, 'rb') as f:
while True:
data = f.read(65536) # read the file by chunk of 64k
if not data:
break
2020-04-26 09:50:14 -07:00
mac.update(data)
return mac.hexdigest()
2020-04-23 10:39:35 -07:00
def check_upload_folder(upload_folder):
if not os.path.exists(upload_folder):
current_app.logger.info('Upload folder does not exist - creating it')
2020-04-23 10:39:35 -07:00
os.mkdir(upload_folder)
2020-04-26 09:50:14 -07:00
def return_file_created_response(
inactive_after_sec: int,
2020-04-26 09:50:14 -07:00
output_filename: str,
mime: str,
key: str,
secret: str,
meta: list,
meta_after: list,
download_link: str
) -> dict:
2020-04-23 10:39:35 -07:00
return {
'inactive_after_sec': inactive_after_sec,
2020-04-23 10:39:35 -07:00
'output_filename': output_filename,
'mime': mime,
'key': key,
2020-04-26 09:50:14 -07:00
'secret': secret,
2020-04-23 10:39:35 -07:00
'meta': meta,
'meta_after': meta_after,
'download_link': download_link
}
def get_supported_extensions():
extensions = set()
for parser in parser_factory._get_parsers():
for m in parser.mimetypes:
extensions |= set(mtype.guess_all_extensions(m, strict=False))
# since `guess_extension` might return `None`, we need to filter it out
return sorted(filter(None, extensions))
def save_file(file, upload_folder):
filename = secure_filename(file.filename)
2020-05-08 09:10:18 -07:00
if not filename:
raise ValueError('Invalid Filename')
2020-04-23 10:39:35 -07:00
filepath = os.path.join(upload_folder, filename)
file.save(os.path.join(filepath))
return filename, filepath
def get_file_parser(filepath: str):
parser, mime = parser_factory.get_parser(filepath)
return parser, mime
def cleanup(parser, filepath, upload_folder):
output_filename = os.path.basename(parser.output_filename)
parser, _ = parser_factory.get_parser(parser.output_filename)
meta_after = parser.get_meta()
os.remove(filepath)
2020-04-26 09:50:14 -07:00
secret = os.urandom(32).hex()
key = hash_file(os.path.join(upload_folder, output_filename), secret)
return key, secret, meta_after, output_filename
2020-04-23 10:39:35 -07:00
def get_file_paths(filename, upload_folder):
filepath = secure_filename(filename)
complete_path = os.path.join(upload_folder, filepath)
return complete_path, filepath
2020-04-26 09:50:14 -07:00
def is_valid_api_download_file(filename: str, key: str, secret: str, upload_folder: str) -> [str, str]:
2020-04-23 10:39:35 -07:00
if filename != secure_filename(filename):
current_app.logger.error('Insecure filename %', filename)
2020-04-23 10:39:35 -07:00
abort(400, message='Insecure filename')
complete_path, filepath = get_file_paths(filename, upload_folder)
if not os.path.exists(complete_path):
current_app.logger.error('File not found')
2020-04-23 10:39:35 -07:00
abort(404, message='File not found')
2020-04-26 09:50:14 -07:00
if hmac.compare_digest(hash_file(complete_path, secret), key) is False:
current_app.logger.error('The file hash does not match')
2020-04-23 10:39:35 -07:00
abort(400, message='The file hash does not match')
return complete_path, filepath
def get_file_removal_max_age_sec() -> int:
return int(os.environ.get('MAT2_MAX_FILE_AGE_FOR_REMOVAL', 15 * 60))