mirror of
https://github.com/RetroShare/RetroShare.git
synced 2024-12-28 00:49:28 -05:00
Merge pull request #103 from AsamK/html_parsing
Improve HTML parsing in libresapi
This commit is contained in:
commit
4a0539c035
@ -294,62 +294,7 @@ void ChatHandler::tick()
|
||||
|
||||
// remove html tags from chat message
|
||||
// extract links form href
|
||||
const std::string& in = msg.msg;
|
||||
std::string out;
|
||||
bool ignore = false;
|
||||
bool keep_link = false;
|
||||
std::string last_six_chars;
|
||||
Triple current_link;
|
||||
std::vector<Triple> links;
|
||||
for(unsigned int i = 0; i < in.size(); ++i)
|
||||
{
|
||||
if(keep_link && in[i] == '"')
|
||||
{
|
||||
keep_link = false;
|
||||
current_link.second = out.size();
|
||||
}
|
||||
if(last_six_chars == "href=\"")
|
||||
{
|
||||
keep_link = true;
|
||||
current_link.first = out.size();
|
||||
}
|
||||
|
||||
// "rising edge" sets mode to ignore
|
||||
if(in[i] == '<')
|
||||
{
|
||||
ignore = true;
|
||||
}
|
||||
if(!ignore || keep_link)
|
||||
out += in[i];
|
||||
// "falling edge" resets mode to keep
|
||||
if(in[i] == '>')
|
||||
ignore = false;
|
||||
|
||||
last_six_chars += in[i];
|
||||
if(last_six_chars.size() > 6)
|
||||
last_six_chars = last_six_chars.substr(1);
|
||||
std::string a = "</a>";
|
||||
if( current_link.first != -1
|
||||
&& last_six_chars.size() >= a.size()
|
||||
&& last_six_chars.substr(last_six_chars.size()-a.size()) == a)
|
||||
{
|
||||
// only allow these protocols
|
||||
// we don't want for example javascript:alert(0)
|
||||
std::string http = "http://";
|
||||
std::string https = "https://";
|
||||
std::string retroshare = "retroshare://";
|
||||
if( out.substr(current_link.first, http.size()) == http
|
||||
|| out.substr(current_link.first, https.size()) == https
|
||||
|| out.substr(current_link.first, retroshare.size()) == retroshare)
|
||||
{
|
||||
current_link.third = out.size();
|
||||
links.push_back(current_link);
|
||||
}
|
||||
current_link = Triple();
|
||||
}
|
||||
}
|
||||
m.msg = out;
|
||||
m.links = links;
|
||||
getPlainText(msg.msg, m.msg, m.links);
|
||||
m.recv_time = msg.recvTime;
|
||||
m.send_time = msg.sendTime;
|
||||
|
||||
@ -390,6 +335,76 @@ void ChatHandler::tick()
|
||||
}
|
||||
}
|
||||
|
||||
void ChatHandler::getPlainText(const std::string& in, std::string &out, std::vector<Triple> &links)
|
||||
{
|
||||
if (in.size() == 0)
|
||||
return;
|
||||
|
||||
if (in[0] != '<' || in[in.size() - 1] != '>')
|
||||
{
|
||||
// It's a plain text message without HTML
|
||||
out = in;
|
||||
return;
|
||||
}
|
||||
bool ignore = false;
|
||||
|
||||
bool keep_link = false;
|
||||
std::string last_six_chars;
|
||||
unsigned int tag_start_index = 0;
|
||||
Triple current_link;
|
||||
for(unsigned int i = 0; i < in.size(); ++i)
|
||||
{
|
||||
if(keep_link && in[i] == '"')
|
||||
{
|
||||
keep_link = false;
|
||||
current_link.second = out.size();
|
||||
}
|
||||
if(last_six_chars == "href=\"")
|
||||
{
|
||||
keep_link = true;
|
||||
current_link.first = out.size();
|
||||
}
|
||||
|
||||
// "rising edge" sets mode to ignore
|
||||
if(in[i] == '<')
|
||||
{
|
||||
tag_start_index = i;
|
||||
ignore = true;
|
||||
}
|
||||
if(!ignore || keep_link)
|
||||
out += in[i];
|
||||
// "falling edge" resets mode to keep
|
||||
if(in[i] == '>') {
|
||||
// leave ignore mode on, if it's a style tag
|
||||
if (tag_start_index == 0 || tag_start_index + 6 > i || in.substr(tag_start_index, 6) != "<style")
|
||||
ignore = false;
|
||||
}
|
||||
|
||||
last_six_chars += in[i];
|
||||
if(last_six_chars.size() > 6)
|
||||
last_six_chars = last_six_chars.substr(1);
|
||||
std::string a = "</a>";
|
||||
if( current_link.first != -1
|
||||
&& last_six_chars.size() >= a.size()
|
||||
&& last_six_chars.substr(last_six_chars.size()-a.size()) == a)
|
||||
{
|
||||
// only allow these protocols
|
||||
// we don't want for example javascript:alert(0)
|
||||
std::string http = "http://";
|
||||
std::string https = "https://";
|
||||
std::string retroshare = "retroshare://";
|
||||
if( out.substr(current_link.first, http.size()) == http
|
||||
|| out.substr(current_link.first, https.size()) == https
|
||||
|| out.substr(current_link.first, retroshare.size()) == retroshare)
|
||||
{
|
||||
current_link.third = out.size();
|
||||
links.push_back(current_link);
|
||||
}
|
||||
current_link = Triple();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ChatHandler::handleWildcard(Request &req, Response &resp)
|
||||
{
|
||||
RS_STACK_MUTEX(mMtx); /********** LOCKED **********/
|
||||
|
@ -102,6 +102,8 @@ private:
|
||||
void handleSendStatus(Request& req, Response& resp);
|
||||
void handleUnreadMsgs(Request& req, Response& resp);
|
||||
|
||||
void getPlainText(const std::string& in, std::string &out, std::vector<Triple> &links);
|
||||
|
||||
StateTokenServer* mStateTokenServer;
|
||||
RsNotify* mNotify;
|
||||
RsMsgs* mRsMsgs;
|
||||
|
Loading…
Reference in New Issue
Block a user