From 11c5f5b699d302edfda15a1d7bfd3ed2fec87a59 Mon Sep 17 00:00:00 2001 From: Emmanuel Morales Date: Sun, 18 Dec 2016 20:57:17 -0800 Subject: [PATCH 1/2] Added a function to remove HTML from file and directory names --- onionshare/web.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/onionshare/web.py b/onionshare/web.py index b8c9f045..32deaa97 100644 --- a/onionshare/web.py +++ b/onionshare/web.py @@ -17,7 +17,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . """ -import queue, mimetypes, platform, os, sys, socket, logging +import queue, mimetypes, platform, os, sys, socket, logging, re from urllib.request import urlopen from flask import Flask, Response, request, render_template_string, abort @@ -30,6 +30,17 @@ file_info = [] zip_filename = None zip_filesize = None +def sanitize_html(basename): + """ + Takes a string, called basename, and removes any HTML that could be in the + string. If the resulting string is empty, return the string 'file', which + is not ideal, but better than embedded HTML that could run JS. + """ + html_regex = re.compile('<.*?>') + sanitized_name = re.sub(html_regex , '', basename) + if sanitized_name == '': + sanitized_name = 'file' + return sanitized_name def set_file_info(filenames): """ @@ -42,9 +53,11 @@ def set_file_info(filenames): # build file info list file_info = {'files': [], 'dirs': []} for filename in filenames: + # strips trailing '/' and sanitizes filename + basename = sanitize_html(os.path.basename(filename.rstrip('/'))) info = { 'filename': filename, - 'basename': os.path.basename(filename.rstrip('/')) + 'basename': basename } if os.path.isfile(filename): info['size'] = os.path.getsize(filename) @@ -54,6 +67,8 @@ def set_file_info(filenames): info['size'] = helpers.dir_size(filename) info['size_human'] = helpers.human_readable_filesize(info['size']) file_info['dirs'].append(info) + + # sort list of files and directories by basename file_info['files'] = sorted(file_info['files'], key=lambda k: k['basename']) file_info['dirs'] = sorted(file_info['dirs'], key=lambda k: k['basename']) From cff11cd7e48fe4e94ef484ba1f1095362330452d Mon Sep 17 00:00:00 2001 From: Micah Lee Date: Wed, 21 Dec 2016 22:56:15 -0800 Subject: [PATCH 2/2] Replaced sanitize_html() function that was based on regex with python3's html.escape() --- onionshare/web.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/onionshare/web.py b/onionshare/web.py index 32deaa97..170775e9 100644 --- a/onionshare/web.py +++ b/onionshare/web.py @@ -17,7 +17,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . """ -import queue, mimetypes, platform, os, sys, socket, logging, re +import queue, mimetypes, platform, os, sys, socket, logging, html from urllib.request import urlopen from flask import Flask, Response, request, render_template_string, abort @@ -30,18 +30,6 @@ file_info = [] zip_filename = None zip_filesize = None -def sanitize_html(basename): - """ - Takes a string, called basename, and removes any HTML that could be in the - string. If the resulting string is empty, return the string 'file', which - is not ideal, but better than embedded HTML that could run JS. - """ - html_regex = re.compile('<.*?>') - sanitized_name = re.sub(html_regex , '', basename) - if sanitized_name == '': - sanitized_name = 'file' - return sanitized_name - def set_file_info(filenames): """ Using the list of filenames being shared, fill in details that the web @@ -54,7 +42,7 @@ def set_file_info(filenames): file_info = {'files': [], 'dirs': []} for filename in filenames: # strips trailing '/' and sanitizes filename - basename = sanitize_html(os.path.basename(filename.rstrip('/'))) + basename = html.escape(os.path.basename(filename.rstrip('/'))) info = { 'filename': filename, 'basename': basename