// Copyright (c) 2006-2013, Andrey N. Sabelnikov, www.sabelnikov.net // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of the Andrey N. Sabelnikov nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER BE LIABLE FOR ANY // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // #include <boost/regex.hpp> #include <boost/lexical_cast.hpp> #include "http_protocol_handler.h" #include "reg_exp_definer.h" #include "string_tools.h" #include "file_io_utils.h" #include "net_parse_helpers.h" #undef MONERO_DEFAULT_LOG_CATEGORY #define MONERO_DEFAULT_LOG_CATEGORY "net.http" #define HTTP_MAX_URI_LEN 9000 #define HTTP_MAX_HEADER_LEN 100000 namespace epee { namespace net_utils { namespace http { struct multipart_entry { std::list<std::pair<std::string, std::string> > m_etc_header_fields; std::string m_content_disposition; std::string m_content_type; std::string m_body; }; inline bool match_boundary(const std::string& content_type, std::string& boundary) { STATIC_REGEXP_EXPR_1(rexp_match_boundary, "boundary=(.*?)(($)|([;\\s,]))", boost::regex::icase | boost::regex::normal); // 1 boost::smatch result; if(boost::regex_search(content_type, result, rexp_match_boundary, boost::match_default) && result[0].matched) { boundary = result[1]; return true; } return false; } inline bool parse_header(std::string::const_iterator it_begin, std::string::const_iterator it_end, multipart_entry& entry) { STATIC_REGEXP_EXPR_1(rexp_mach_field, "\n?((Content-Disposition)|(Content-Type)" // 12 3 "|([\\w-]+?)) ?: ?((.*?)(\r?\n))[^\t ]", //4 56 7 boost::regex::icase | boost::regex::normal); boost::smatch result; std::string::const_iterator it_current_bound = it_begin; std::string::const_iterator it_end_bound = it_end; //lookup all fields and fill well-known fields while( boost::regex_search( it_current_bound, it_end_bound, result, rexp_mach_field, boost::match_default) && result[0].matched) { const size_t field_val = 6; const size_t field_etc_name = 4; int i = 2; //start position = 2 if(result[i++].matched)//"Content-Disposition" entry.m_content_disposition = result[field_val]; else if(result[i++].matched)//"Content-Type" entry.m_content_type = result[field_val]; else if(result[i++].matched)//e.t.c (HAVE TO BE MATCHED!) entry.m_etc_header_fields.push_back(std::pair<std::string, std::string>(result[field_etc_name], result[field_val])); else { LOG_ERROR("simple_http_connection_handler::parse_header() not matched last entry in:"<<std::string(it_current_bound, it_end)); } it_current_bound = result[(int)result.size()-1].first; } return true; } inline bool handle_part_of_multipart(std::string::const_iterator it_begin, std::string::const_iterator it_end, multipart_entry& entry) { std::string end_str = "\r\n\r\n"; std::string::const_iterator end_header_it = std::search(it_begin, it_end, end_str.begin(), end_str.end()); if(end_header_it == it_end) { //header not matched return false; } if(!parse_header(it_begin, end_header_it+4, entry)) { LOG_ERROR("Failed to parse header:" << std::string(it_begin, end_header_it+2)); return false; } entry.m_body.assign(end_header_it+4, it_end); return true; } inline bool parse_multipart_body(const std::string& content_type, const std::string& body, std::list<multipart_entry>& out_values) { //bool res = file_io_utils::load_file_to_string("C:\\public\\multupart_data", body); std::string boundary; if(!match_boundary(content_type, boundary)) { MERROR("Failed to match boundary in content type: " << content_type); return false; } boundary+="\r\n"; bool is_stop = false; bool first_step = true; std::string::const_iterator it_begin = body.begin(); std::string::const_iterator it_end; while(!is_stop) { std::string::size_type pos = body.find(boundary, std::distance(body.begin(), it_begin)); if(std::string::npos == pos) { is_stop = true; boundary.erase(boundary.size()-2, 2); boundary+= "--"; pos = body.find(boundary, std::distance(body.begin(), it_begin)); if(std::string::npos == pos) { MERROR("Error: Filed to match closing multipart tag"); it_end = body.end(); }else { it_end = body.begin() + pos; } }else it_end = body.begin() + pos; if(first_step && !is_stop) { first_step = false; it_begin = it_end + boundary.size(); std::string temp = "\r\n--"; boundary = temp + boundary; continue; } out_values.push_back(multipart_entry()); if(!handle_part_of_multipart(it_begin, it_end, out_values.back())) { MERROR("Failed to handle_part_of_multipart"); return false; } it_begin = it_end + boundary.size(); } return true; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> simple_http_connection_handler<t_connection_context>::simple_http_connection_handler(i_service_endpoint* psnd_hndlr, config_type& config): m_state(http_state_retriving_comand_line), m_body_transfer_type(http_body_transfer_undefined), m_is_stop_handling(false), m_len_summary(0), m_len_remain(0), m_config(config), m_want_close(false), m_psnd_hndlr(psnd_hndlr) { } //-------------------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::set_ready_state() { m_is_stop_handling = false; m_state = http_state_retriving_comand_line; m_body_transfer_type = http_body_transfer_undefined; m_query_info.clear(); m_len_summary = 0; return true; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::handle_recv(const void* ptr, size_t cb) { std::string buf((const char*)ptr, cb); //LOG_PRINT_L0("HTTP_RECV: " << ptr << "\r\n" << buf); //file_io_utils::save_string_to_file(string_tools::get_current_module_folder() + "/" + boost::lexical_cast<std::string>(ptr), std::string((const char*)ptr, cb)); bool res = handle_buff_in(buf); if(m_want_close/*m_state == http_state_connection_close || m_state == http_state_error*/) return false; return res; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::handle_buff_in(std::string& buf) { if(m_cache.size()) m_cache += buf; else m_cache.swap(buf); m_is_stop_handling = false; while(!m_is_stop_handling) { switch(m_state) { case http_state_retriving_comand_line: //The HTTP protocol does not place any a priori limit on the length of a URI. (c)RFC2616 //but we forebly restirct it len to HTTP_MAX_URI_LEN to make it more safely if(!m_cache.size()) break; //check_and_handle_fake_response(); if((m_cache[0] == '\r' || m_cache[0] == '\n')) { //some times it could be that before query line cold be few line breaks //so we have to be calm without panic with assers m_cache.erase(0, 1); break; } if(std::string::npos != m_cache.find('\n', 0)) handle_invoke_query_line(); else { m_is_stop_handling = true; if(m_cache.size() > HTTP_MAX_URI_LEN) { LOG_ERROR("simple_http_connection_handler::handle_buff_out: Too long URI line"); m_state = http_state_error; return false; } } break; case http_state_retriving_header: { std::string::size_type pos = match_end_of_header(m_cache); if(std::string::npos == pos) { m_is_stop_handling = true; if(m_cache.size() > HTTP_MAX_HEADER_LEN) { LOG_ERROR("simple_http_connection_handler::handle_buff_in: Too long header area"); m_state = http_state_error; return false; } break; } if (!analize_cached_request_header_and_invoke_state(pos)) return false; break; } case http_state_retriving_body: return handle_retriving_query_body(); case http_state_connection_close: return false; default: LOG_ERROR("simple_http_connection_handler::handle_char_out: Wrong state: " << m_state); return false; case http_state_error: LOG_ERROR("simple_http_connection_handler::handle_char_out: Error state!!!"); return false; } if(!m_cache.size()) m_is_stop_handling = true; } return true; } //-------------------------------------------------------------------------------------------- inline bool analize_http_method(const boost::smatch& result, http::http_method& method, int& http_ver_major, int& http_ver_minor) { CHECK_AND_ASSERT_MES(result[0].matched, false, "simple_http_connection_handler::analize_http_method() assert failed..."); http_ver_major = boost::lexical_cast<int>(result[11]); http_ver_minor = boost::lexical_cast<int>(result[12]); if(result[4].matched) method = http::http_method_get; else if(result[5].matched) method = http::http_method_head; else if(result[6].matched) method = http::http_method_post; else if(result[7].matched) method = http::http_method_put; else method = http::http_method_etc; return true; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::handle_invoke_query_line() { STATIC_REGEXP_EXPR_1(rexp_match_command_line, "^(((OPTIONS)|(GET)|(HEAD)|(POST)|(PUT)|(DELETE)|(TRACE)) (\\S+) HTTP/(\\d+).(\\d+))\r?\n", boost::regex::icase | boost::regex::normal); // 123 4 5 6 7 8 9 10 11 12 //size_t match_len = 0; boost::smatch result; if(boost::regex_search(m_cache, result, rexp_match_command_line, boost::match_default) && result[0].matched) { analize_http_method(result, m_query_info.m_http_method, m_query_info.m_http_ver_hi, m_query_info.m_http_ver_hi); m_query_info.m_URI = result[10]; parse_uri(m_query_info.m_URI, m_query_info.m_uri_content); m_query_info.m_http_method_str = result[2]; m_query_info.m_full_request_str = result[0]; m_cache.erase(m_cache.begin(), to_nonsonst_iterator(m_cache, result[0].second)); m_state = http_state_retriving_header; return true; }else { m_state = http_state_error; LOG_ERROR("simple_http_connection_handler<t_connection_context>::handle_invoke_query_line(): Failed to match first line: " << m_cache); return false; } return false; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> std::string::size_type simple_http_connection_handler<t_connection_context>::match_end_of_header(const std::string& buf) { //Here we returning head size, including terminating sequence (\r\n\r\n or \n\n) std::string::size_type res = buf.find("\r\n\r\n"); if(std::string::npos != res) return res+4; res = buf.find("\n\n"); if(std::string::npos != res) return res+2; return res; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::analize_cached_request_header_and_invoke_state(size_t pos) { //LOG_PRINT_L4("HTTP HEAD:\r\n" << m_cache.substr(0, pos)); m_query_info.m_full_request_buf_size = pos; m_query_info.m_request_head.assign(m_cache.begin(), m_cache.begin()+pos); if(!parse_cached_header(m_query_info.m_header_info, m_cache, pos)) { LOG_ERROR("simple_http_connection_handler<t_connection_context>::analize_cached_request_header_and_invoke_state(): failed to anilize request header: " << m_cache); m_state = http_state_error; return false; } if (!m_config.m_required_user_agent.empty() && m_query_info.m_header_info.m_user_agent != m_config.m_required_user_agent) { LOG_ERROR("simple_http_connection_handler<t_connection_context>::analize_cached_request_header_and_invoke_state(): unexpected user agent: " << m_query_info.m_header_info.m_user_agent); m_state = http_state_error; return false; } m_cache.erase(0, pos); std::string req_command_str = m_query_info.m_full_request_str; //if we have POST or PUT command, it is very possible tha we will get body //but now, we suppose than we have body only in case of we have "ContentLength" if(m_query_info.m_header_info.m_content_length.size()) { m_state = http_state_retriving_body; m_body_transfer_type = http_body_transfer_measure; if(!get_len_from_content_lenght(m_query_info.m_header_info.m_content_length, m_len_summary)) { LOG_ERROR("simple_http_connection_handler<t_connection_context>::analize_cached_request_header_and_invoke_state(): Failed to get_len_from_content_lenght();, m_query_info.m_content_length="<<m_query_info.m_header_info.m_content_length); m_state = http_state_error; return false; } if(0 == m_len_summary) { //current query finished, next will be next query if(handle_request_and_send_response(m_query_info)) set_ready_state(); else m_state = http_state_error; } m_len_remain = m_len_summary; }else {//current query finished, next will be next query handle_request_and_send_response(m_query_info); set_ready_state(); } return true; } //----------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::handle_retriving_query_body() { switch(m_body_transfer_type) { case http_body_transfer_measure: return handle_query_measure(); case http_body_transfer_chunked: case http_body_transfer_connection_close: case http_body_transfer_multipart: case http_body_transfer_undefined: default: LOG_ERROR("simple_http_connection_handler<t_connection_context>::handle_retriving_query_body(): Unexpected m_body_query_type state:" << m_body_transfer_type); m_state = http_state_error; return false; } return true; } //----------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::handle_query_measure() { if(m_len_remain >= m_cache.size()) { m_len_remain -= m_cache.size(); m_query_info.m_body += m_cache; m_cache.clear(); }else { m_query_info.m_body.append(m_cache.begin(), m_cache.begin() + m_len_remain); m_cache.erase(0, m_len_remain); m_len_remain = 0; } if(!m_len_remain) { if(handle_request_and_send_response(m_query_info)) set_ready_state(); else m_state = http_state_error; } return true; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::parse_cached_header(http_header_info& body_info, const std::string& m_cache_to_process, size_t pos) { STATIC_REGEXP_EXPR_1(rexp_mach_field, "\n?((Connection)|(Referer)|(Content-Length)|(Content-Type)|(Transfer-Encoding)|(Content-Encoding)|(Host)|(Cookie)|(User-Agent)" // 12 3 4 5 6 7 8 9 10 "|([\\w-]+?)) ?: ?((.*?)(\r?\n))[^\t ]", //11 1213 14 boost::regex::icase | boost::regex::normal); boost::smatch result; std::string::const_iterator it_current_bound = m_cache_to_process.begin(); std::string::const_iterator it_end_bound = m_cache_to_process.begin()+pos; body_info.clear(); //lookup all fields and fill well-known fields while( boost::regex_search( it_current_bound, it_end_bound, result, rexp_mach_field, boost::match_default) && result[0].matched) { const size_t field_val = 13; const size_t field_etc_name = 11; int i = 2; //start position = 2 if(result[i++].matched)//"Connection" body_info.m_connection = result[field_val]; else if(result[i++].matched)//"Referer" body_info.m_referer = result[field_val]; else if(result[i++].matched)//"Content-Length" body_info.m_content_length = result[field_val]; else if(result[i++].matched)//"Content-Type" body_info.m_content_type = result[field_val]; else if(result[i++].matched)//"Transfer-Encoding" body_info.m_transfer_encoding = result[field_val]; else if(result[i++].matched)//"Content-Encoding" body_info.m_content_encoding = result[field_val]; else if(result[i++].matched)//"Host" body_info.m_host = result[field_val]; else if(result[i++].matched)//"Cookie" body_info.m_cookie = result[field_val]; else if(result[i++].matched)//"User-Agent" body_info.m_user_agent = result[field_val]; else if(result[i++].matched)//e.t.c (HAVE TO BE MATCHED!) body_info.m_etc_fields.push_back(std::pair<std::string, std::string>(result[field_etc_name], result[field_val])); else { LOG_ERROR("simple_http_connection_handler<t_connection_context>::parse_cached_header() not matched last entry in:"<<m_cache_to_process); } it_current_bound = result[(int)result.size()-1]. first; } return true; } //----------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::get_len_from_content_lenght(const std::string& str, size_t& OUT len) { STATIC_REGEXP_EXPR_1(rexp_mach_field, "\\d+", boost::regex::normal); std::string res; boost::smatch result; if(!(boost::regex_search( str, result, rexp_mach_field, boost::match_default) && result[0].matched)) return false; len = boost::lexical_cast<size_t>(result[0]); return true; } //----------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::handle_request_and_send_response(const http::http_request_info& query_info) { http_response_info response; bool res = handle_request(query_info, response); //CHECK_AND_ASSERT_MES(res, res, "handle_request(query_info, response) returned false" ); std::string response_data = get_response_header(response); //LOG_PRINT_L0("HTTP_SEND: << \r\n" << response_data + response.m_body); LOG_PRINT_L3("HTTP_RESPONSE_HEAD: << \r\n" << response_data); m_psnd_hndlr->do_send((void*)response_data.data(), response_data.size()); if(response.m_body.size()) m_psnd_hndlr->do_send((void*)response.m_body.data(), response.m_body.size()); return res; } //----------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::handle_request(const http::http_request_info& query_info, http_response_info& response) { std::string uri_to_path = query_info.m_uri_content.m_path; if("/" == uri_to_path) uri_to_path = "/index.html"; //slash_to_back_slash(uri_to_path); m_config.m_lock.lock(); std::string destination_file_path = m_config.m_folder + uri_to_path; m_config.m_lock.unlock(); if(!file_io_utils::load_file_to_string(destination_file_path.c_str(), response.m_body)) { MWARNING("URI \""<< query_info.m_full_request_str.substr(0, query_info.m_full_request_str.size()-2) << "\" [" << destination_file_path << "] Not Found (404 )"); response.m_body = get_not_found_response_body(query_info.m_URI); response.m_response_code = 404; response.m_response_comment = "Not found"; response.m_mime_tipe = "text/html"; return true; } MDEBUG(" -->> " << query_info.m_full_request_str << "\r\n<<--OK"); response.m_response_code = 200; response.m_response_comment = "OK"; response.m_mime_tipe = get_file_mime_tipe(uri_to_path); return true; } //----------------------------------------------------------------------------------- template<class t_connection_context> std::string simple_http_connection_handler<t_connection_context>::get_response_header(const http_response_info& response) { std::string buf = "HTTP/1.1 "; buf += boost::lexical_cast<std::string>(response.m_response_code) + " " + response.m_response_comment + "\r\n" + "Server: Epee-based\r\n" "Content-Length: "; buf += boost::lexical_cast<std::string>(response.m_body.size()) + "\r\n"; buf += "Content-Type: "; buf += response.m_mime_tipe + "\r\n"; buf += "Last-Modified: "; time_t tm; time(&tm); buf += misc_utils::get_internet_time_str(tm) + "\r\n"; buf += "Accept-Ranges: bytes\r\n"; //Wed, 01 Dec 2010 03:27:41 GMT" string_tools::trim(m_query_info.m_header_info.m_connection); if(m_query_info.m_header_info.m_connection.size()) { if(!string_tools::compare_no_case("close", m_query_info.m_header_info.m_connection)) { //closing connection after sending buf += "Connection: close\r\n"; m_state = http_state_connection_close; m_want_close = true; } } //add additional fields, if it is for(fields_list::const_iterator it = response.m_additional_fields.begin(); it!=response.m_additional_fields.end(); it++) buf += it->first + ":" + it->second + "\r\n"; buf+="\r\n"; return buf; } //----------------------------------------------------------------------------------- template<class t_connection_context> std::string simple_http_connection_handler<t_connection_context>::get_file_mime_tipe(const std::string& path) { std::string result; std::string ext = string_tools::get_extension(path); if(!string_tools::compare_no_case(ext, "gif")) result = "image/gif"; else if(!string_tools::compare_no_case(ext, "jpg")) result = "image/jpeg"; else if(!string_tools::compare_no_case(ext, "html")) result = "text/html"; else if(!string_tools::compare_no_case(ext, "htm")) result = "text/html"; else if(!string_tools::compare_no_case(ext, "js")) result = "application/x-javascript"; else if(!string_tools::compare_no_case(ext, "css")) result = "text/css"; else if(!string_tools::compare_no_case(ext, "xml")) result = "application/xml"; else if(!string_tools::compare_no_case(ext, "svg")) result = "image/svg+xml"; return result; } //----------------------------------------------------------------------------------- template<class t_connection_context> std::string simple_http_connection_handler<t_connection_context>::get_not_found_response_body(const std::string& URI) { std::string body = "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\r\n" "<html><head>\r\n" "<title>404 Not Found</title>\r\n" "</head><body>\r\n" "<h1>Not Found</h1>\r\n" "<p>The requested URL \r\n"; body += URI; body += "was not found on this server.</p>\r\n" "</body></html>\r\n"; return body; } //-------------------------------------------------------------------------------------------- template<class t_connection_context> bool simple_http_connection_handler<t_connection_context>::slash_to_back_slash(std::string& str) { for(std::string::iterator it = str.begin(); it!=str.end(); it++) if('/' == *it) *it = '\\'; return true; } } } } //-------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------