mirror of
https://github.com/RetroShare/RetroShare.git
synced 2024-10-01 02:35:48 -04:00
1025 lines
26 KiB
C++
1025 lines
26 KiB
C++
|
/****************************************************************
|
||
|
* RetroShare GUI is distributed under the following license:
|
||
|
*
|
||
|
* Copyright (C) 2012 by Thunder
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or
|
||
|
* modify it under the terms of the GNU General Public License
|
||
|
* as published by the Free Software Foundation; either version 2
|
||
|
* of the License, or (at your option) any later version.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program; if not, write to the Free Software
|
||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||
|
* Boston, MA 02110-1301, USA.
|
||
|
****************************************************************/
|
||
|
|
||
|
#include "p3FeedReaderThread.h"
|
||
|
#include "rsFeedReaderItems.h"
|
||
|
#include "util/rsstring.h"
|
||
|
|
||
|
#include <curl/curl.h>
|
||
|
#include <libxml/parser.h>
|
||
|
#include <openssl/evp.h>
|
||
|
|
||
|
enum FeedFormat { FORMAT_RSS, FORMAT_RDF };
|
||
|
|
||
|
/*********
|
||
|
* #define FEEDREADER_DEBUG
|
||
|
*********/
|
||
|
|
||
|
p3FeedReaderThread::p3FeedReaderThread(p3FeedReader *feedReader, Type type) : RsThread(), mFeedReader(feedReader), mType(type)
|
||
|
{
|
||
|
if (type == PROCESS) {
|
||
|
mCharEncodingHandler = xmlFindCharEncodingHandler ("UTF8");
|
||
|
|
||
|
if (!mCharEncodingHandler) {
|
||
|
/* no encoding handler found */
|
||
|
std::cerr << "p3FeedReaderThread::p3FeedReaderThread - no encoding handler found" << std::endl;
|
||
|
}
|
||
|
} else {
|
||
|
mCharEncodingHandler = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/***************************************************************************/
|
||
|
/****************************** Thread *************************************/
|
||
|
/***************************************************************************/
|
||
|
|
||
|
void p3FeedReaderThread::run()
|
||
|
{
|
||
|
while (isRunning()) {
|
||
|
#ifdef WIN32
|
||
|
Sleep(1000);
|
||
|
#else
|
||
|
usleep(1000000);
|
||
|
#endif
|
||
|
/* every second */
|
||
|
|
||
|
switch (mType) {
|
||
|
case DOWNLOAD:
|
||
|
{
|
||
|
RsFeedReaderFeed feed;
|
||
|
if (mFeedReader->getFeedToDownload(feed)) {
|
||
|
std::string content;
|
||
|
std::string icon;
|
||
|
std::string error;
|
||
|
|
||
|
DownloadResult result = download(feed, content, icon, error);
|
||
|
if (result == DOWNLOAD_SUCCESS) {
|
||
|
mFeedReader->onDownloadSuccess(feed.feedId, content, icon);
|
||
|
} else {
|
||
|
mFeedReader->onDownloadError(feed.feedId, result, error);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case PROCESS:
|
||
|
{
|
||
|
RsFeedReaderFeed feed;
|
||
|
if (mFeedReader->getFeedToProcess(feed)) {
|
||
|
std::list<RsFeedReaderMsg*> entries;
|
||
|
std::string error;
|
||
|
|
||
|
ProcessResult result = process(feed, entries, error);
|
||
|
if (result == PROCESS_SUCCESS) {
|
||
|
mFeedReader->onProcessSuccess(feed.feedId, entries);
|
||
|
} else {
|
||
|
mFeedReader->onProcessError(feed.feedId, result);
|
||
|
}
|
||
|
|
||
|
std::list<RsFeedReaderMsg*>::iterator it;
|
||
|
for (it = entries.begin(); it != entries.end(); ++it) {
|
||
|
delete (*it);
|
||
|
}
|
||
|
entries.clear();
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/***************************************************************************/
|
||
|
/****************************** Download ***********************************/
|
||
|
/***************************************************************************/
|
||
|
|
||
|
static size_t writeFunctionString (void *ptr, size_t size, size_t nmemb, void *stream)
|
||
|
{
|
||
|
std::string *s = (std::string*) stream;
|
||
|
s->append ((char*) ptr, size * nmemb);
|
||
|
|
||
|
return nmemb * size;
|
||
|
}
|
||
|
|
||
|
static size_t writeFunctionBinary (void *ptr, size_t size, size_t nmemb, void *stream)
|
||
|
{
|
||
|
std::vector<byte> *bytes = (std::vector<byte>*) stream;
|
||
|
|
||
|
std::vector<byte> newBytes;
|
||
|
newBytes.resize(size * nmemb);
|
||
|
memcpy(newBytes.data(), ptr, newBytes.size());
|
||
|
|
||
|
bytes->insert(bytes->end(), newBytes.begin(), newBytes.end());
|
||
|
|
||
|
return nmemb * size;
|
||
|
}
|
||
|
|
||
|
static int progressCallback (void *clientp, double /*dltotal*/, double /*dlnow*/, double /*ultotal*/, double /*ulnow*/)
|
||
|
{
|
||
|
p3FeedReaderThread *thread = (p3FeedReaderThread*) clientp;
|
||
|
|
||
|
if (!thread->isRunning()) {
|
||
|
/* thread was stopped */
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
long todo; // show progress in gui
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static bool getFavicon (std::string url, const std::string &proxy, std::string &icon)
|
||
|
{
|
||
|
icon.clear();
|
||
|
|
||
|
if (url.substr(0, 7) == "http://") {
|
||
|
int found = url.find("/", 7);
|
||
|
if (found >= 0) {
|
||
|
url.erase(found, url.length() - found);
|
||
|
}
|
||
|
} else {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool result = false;
|
||
|
|
||
|
CURL *curl = curl_easy_init();
|
||
|
if (curl) {
|
||
|
url += "/favicon.ico";
|
||
|
|
||
|
std::vector<byte> vicon;
|
||
|
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1);
|
||
|
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFunctionBinary);
|
||
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &vicon);
|
||
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
||
|
curl_easy_setopt(curl, CURLOPT_COOKIESESSION, 1);
|
||
|
|
||
|
if (!proxy.empty()) {
|
||
|
curl_easy_setopt(curl, CURLOPT_PROXY, proxy.c_str());
|
||
|
}
|
||
|
|
||
|
CURLcode code = curl_easy_perform(curl);
|
||
|
if (code == CURLE_OK) {
|
||
|
long response;
|
||
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
|
||
|
if (response == 200) {
|
||
|
char *contentType = NULL;
|
||
|
curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &contentType);
|
||
|
if (contentType &&
|
||
|
(strnicmp(contentType, "image/x-icon", 12) == 0 ||
|
||
|
strnicmp(contentType, "application/octet-stream", 24) == 0 ||
|
||
|
strnicmp(contentType, "text/plain", 10) == 0)) {
|
||
|
if (!vicon.empty()) {
|
||
|
long todo; // check it
|
||
|
// Set up a base64 encoding BIO that writes to a memory BIO
|
||
|
BIO *b64 = BIO_new(BIO_f_base64());
|
||
|
if (b64) {
|
||
|
BIO *out = BIO_new(BIO_s_mem());
|
||
|
if (out) {
|
||
|
BIO_set_flags(out, BIO_CLOSE); // probably redundant
|
||
|
b64 = BIO_push(b64, out);
|
||
|
// Send the data
|
||
|
BIO_write(b64, vicon.data(), vicon.size());
|
||
|
// Collect the encoded data
|
||
|
BIO_flush(b64);
|
||
|
char* temp;
|
||
|
int count = BIO_get_mem_data(out, &temp);
|
||
|
if (count && temp) {
|
||
|
icon = temp;
|
||
|
result = true;
|
||
|
}
|
||
|
}
|
||
|
BIO_free_all(b64);
|
||
|
}
|
||
|
}
|
||
|
// char *encode = NULL;
|
||
|
// size_t encodeSize = 0;
|
||
|
// code = Curl_base64_encode(curl, (const char*) vicon.data(), vicon.size(), &encode, &encodeSize);
|
||
|
// if (code == CURLE_OK && encodeSize) {
|
||
|
// icon = encode;
|
||
|
// free(encode);
|
||
|
// encode = NULL;
|
||
|
// result = true;
|
||
|
// }
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
curl_easy_cleanup(curl);
|
||
|
curl = NULL;
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
p3FeedReaderThread::DownloadResult p3FeedReaderThread::download(const RsFeedReaderFeed &feed, std::string &content, std::string &icon, std::string &error)
|
||
|
{
|
||
|
#ifdef FEEDREADER_DEBUG
|
||
|
std::cerr << "p3FeedReaderThread::download - feed " << feed.feedId << " (" << feed.name << ")" << std::endl;
|
||
|
#endif
|
||
|
|
||
|
content.clear();
|
||
|
error.clear();
|
||
|
|
||
|
DownloadResult result;
|
||
|
|
||
|
CURL *curl = curl_easy_init();
|
||
|
if (curl) {
|
||
|
std::string proxy;
|
||
|
if (feed.flag & RS_FEED_FLAG_STANDARD_PROXY) {
|
||
|
std::string standardProxyAddress;
|
||
|
uint16_t standardProxyPort;
|
||
|
if (mFeedReader->getStandardProxy(standardProxyAddress, standardProxyPort)) {
|
||
|
rs_sprintf(proxy, "%s:%u", standardProxyAddress.c_str(), standardProxyPort);
|
||
|
}
|
||
|
} else {
|
||
|
if (!feed.proxyAddress.empty() && feed.proxyPort) {
|
||
|
rs_sprintf(proxy, "%s:%u", feed.proxyAddress.c_str(), feed.proxyPort);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
|
||
|
curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, progressCallback);
|
||
|
curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, this);
|
||
|
curl_easy_setopt(curl, CURLOPT_URL, feed.url.c_str());
|
||
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFunctionString);
|
||
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &content);
|
||
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
||
|
|
||
|
if (!proxy.empty()) {
|
||
|
curl_easy_setopt(curl, CURLOPT_PROXY, proxy.c_str());
|
||
|
}
|
||
|
|
||
|
CURLcode code = curl_easy_perform(curl);
|
||
|
if (code == CURLE_OK) {
|
||
|
long response;
|
||
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
|
||
|
if (response == 200) {
|
||
|
char *contentType = NULL;
|
||
|
curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &contentType);
|
||
|
if (contentType &&
|
||
|
(strnicmp(contentType, "text/xml", 8) == 0 ||
|
||
|
strnicmp(contentType, "application/rss+xml", 19) == 0 ||
|
||
|
strnicmp(contentType, "application/xml", 15) == 0 ||
|
||
|
strnicmp(contentType, "application/xhtml+xml", 21) == 0)) {
|
||
|
/* ok */
|
||
|
result = DOWNLOAD_SUCCESS;
|
||
|
} else {
|
||
|
result = DOWNLOAD_UNKNOWN_CONTENT_TYPE;
|
||
|
error = contentType ? contentType : "";
|
||
|
}
|
||
|
} else if (response == 404) {
|
||
|
result = DOWNLOAD_NOT_FOUND;
|
||
|
} else {
|
||
|
result = DOWNLOAD_UNKOWN_RESPONSE_CODE;
|
||
|
rs_sprintf(error, "%ld", response);
|
||
|
}
|
||
|
} else {
|
||
|
result = DOWNLOAD_ERROR;
|
||
|
error = curl_easy_strerror(code);
|
||
|
}
|
||
|
|
||
|
curl_easy_cleanup(curl);
|
||
|
curl = NULL;
|
||
|
|
||
|
getFavicon(feed.url, proxy, icon);
|
||
|
} else {
|
||
|
result = DOWNLOAD_ERROR_INIT;
|
||
|
}
|
||
|
|
||
|
#ifdef FEEDREADER_DEBUG
|
||
|
std::cerr << "p3FeedReaderThread::download - feed " << feed.feedId << " (" << feed.name << "), result " << result << ", error = " << error << std::endl;
|
||
|
#endif
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/***************************************************************************/
|
||
|
/****************************** Process ************************************/
|
||
|
/***************************************************************************/
|
||
|
|
||
|
static bool convertOutput(xmlCharEncodingHandlerPtr charEncodingHandler, const xmlChar *output, std::string &text)
|
||
|
{
|
||
|
if (!output) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (charEncodingHandler == NULL || charEncodingHandler->output == NULL) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool result = false;
|
||
|
int sizeOut = xmlStrlen(output) + 1;
|
||
|
int sizeIn = sizeOut * 2 - 1;
|
||
|
char *input = (char*) malloc(sizeIn * sizeof(char));
|
||
|
|
||
|
if (input) {
|
||
|
int temp = sizeOut - 1;
|
||
|
int ret = charEncodingHandler->output((xmlChar*) input, &sizeIn, (const xmlChar *) output, &temp);
|
||
|
if ((ret < 0) || (temp - sizeOut + 1)) {
|
||
|
if (ret < 0) {
|
||
|
std::cerr << "convertOutput: conversion wasn't successful." << std::endl;
|
||
|
} else {
|
||
|
std::cerr << "convertOutput: conversion wasn't successful. converted: " << temp << " octets." << std::endl;
|
||
|
}
|
||
|
} else {
|
||
|
text.assign(input, sizeIn);
|
||
|
result = true;
|
||
|
}
|
||
|
|
||
|
free(input);
|
||
|
input = NULL;
|
||
|
} else {
|
||
|
std::cerr << "convertOutput: no mem" << std::endl;
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
static xmlNodePtr findNode(xmlNodePtr node, const char *name, bool children = false)
|
||
|
{
|
||
|
if (node->name) {
|
||
|
if (xmlStrcasecmp (node->name, (xmlChar*) name) == 0) {
|
||
|
return node;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
xmlNodePtr nodeFound = NULL;
|
||
|
if (children) {
|
||
|
if (node->children) {
|
||
|
nodeFound = findNode(node->children, name, children);
|
||
|
if (nodeFound) {
|
||
|
return nodeFound;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (node->next) {
|
||
|
nodeFound = findNode(node->next, name, children);
|
||
|
if (nodeFound) {
|
||
|
return nodeFound;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
static xmlNodePtr getNextItem(FeedFormat feedFormat, xmlNodePtr channel, xmlNodePtr item)
|
||
|
{
|
||
|
if (!channel) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (!item) {
|
||
|
switch (feedFormat) {
|
||
|
case FORMAT_RSS:
|
||
|
item = channel->children;
|
||
|
break;
|
||
|
case FORMAT_RDF:
|
||
|
item = channel->next;
|
||
|
break;
|
||
|
default:
|
||
|
return NULL;
|
||
|
}
|
||
|
} else {
|
||
|
item = item->next;
|
||
|
}
|
||
|
for (; item; item = item->next) {
|
||
|
if (item->type == XML_ELEMENT_NODE && xmlStrcasecmp (item->name, (xmlChar*) "item") == 0) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return item;
|
||
|
}
|
||
|
|
||
|
static bool getChildText(/*xmlCharEncodingHandlerPtr*/ void *charEncodingHandler, xmlNodePtr node, const char *childName, std::string &text)
|
||
|
{
|
||
|
if (node == NULL || node->children == NULL) {
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
xmlNodePtr child = findNode(node->children, childName, true);
|
||
|
if (!child) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (child->type != XML_ELEMENT_NODE) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (!child->children) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (child->children->type != XML_TEXT_NODE) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (child->children->content) {
|
||
|
return convertOutput((xmlCharEncodingHandlerPtr) charEncodingHandler, child->children->content, text);
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
static void splitString(std::string s, std::vector<std::string> &v, const char d)
|
||
|
{
|
||
|
v.clear();
|
||
|
|
||
|
std::string::size_type p;
|
||
|
while ((p = s.find_first_of(d)) != std::string::npos) {
|
||
|
v.push_back(s.substr(0, p));
|
||
|
s.erase(0, p + 1);
|
||
|
}
|
||
|
if (!s.empty()) {
|
||
|
v.push_back(s);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static unsigned int ymdhms_to_seconds(int year, int mon, int day, int hour, int minute, int second)
|
||
|
{
|
||
|
if (sizeof(time_t) == 4)
|
||
|
{
|
||
|
if ((time_t)-1 < 0)
|
||
|
{
|
||
|
if (year >= 2038)
|
||
|
{
|
||
|
year = 2038;
|
||
|
mon = 0;
|
||
|
day = 1;
|
||
|
hour = 0;
|
||
|
minute = 0;
|
||
|
second = 0;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (year >= 2115)
|
||
|
{
|
||
|
year = 2115;
|
||
|
mon = 0;
|
||
|
day = 1;
|
||
|
hour = 0;
|
||
|
minute = 0;
|
||
|
second = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
unsigned int ret = (day - 32075) /* days */
|
||
|
+ 1461L * (year + 4800L + (mon - 14) / 12) / 4
|
||
|
+ 367 * (mon - 2 - (mon - 14) / 12 * 12) / 12
|
||
|
- 3 * ((year + 4900L + (mon - 14) / 12) / 100) / 4
|
||
|
- 2440588;
|
||
|
ret = 24*ret + hour; /* hours */
|
||
|
ret = 60*ret + minute; /* minutes */
|
||
|
ret = 60*ret + second; /* seconds */
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static const char haystack[37]="janfebmaraprmayjunjulaugsepoctnovdec";
|
||
|
|
||
|
// we follow the recommendation of rfc2822 to consider all
|
||
|
// obsolete time zones not listed here equivalent to "-0000"
|
||
|
static const struct {
|
||
|
const char tzName[4];
|
||
|
int tzOffset;
|
||
|
} known_zones[] = {
|
||
|
{ "UT", 0 },
|
||
|
{ "GMT", 0 },
|
||
|
{ "EST", -300 },
|
||
|
{ "EDT", -240 },
|
||
|
{ "CST", -360 },
|
||
|
{ "CDT", -300 },
|
||
|
{ "MST", -420 },
|
||
|
{ "MDT", -360 },
|
||
|
{ "PST", -480 },
|
||
|
{ "PDT", -420 },
|
||
|
{ { 0,0,0,0 }, 0 }
|
||
|
};
|
||
|
|
||
|
// copied from KRFCDate::parseDate
|
||
|
static time_t parseRFC822Date(const std::string &pubDate)
|
||
|
{
|
||
|
if (pubDate.empty())
|
||
|
return 0;
|
||
|
|
||
|
// This parse a date in the form:
|
||
|
// Wednesday, 09-Nov-99 23:12:40 GMT
|
||
|
// or
|
||
|
// Sat, 01-Jan-2000 08:00:00 GMT
|
||
|
// or
|
||
|
// Sat, 01 Jan 2000 08:00:00 GMT
|
||
|
// or
|
||
|
// 01 Jan 99 22:00 +0100 (exceptions in rfc822/rfc2822)
|
||
|
//
|
||
|
// We ignore the weekday
|
||
|
//
|
||
|
time_t result = 0;
|
||
|
int offset = 0;
|
||
|
char *newPosStr;
|
||
|
const char *dateString = pubDate.c_str();
|
||
|
int day = 0;
|
||
|
char monthStr[4];
|
||
|
int month = -1;
|
||
|
int year = 0;
|
||
|
int hour = 0;
|
||
|
int minute = 0;
|
||
|
int second = 0;
|
||
|
|
||
|
// Strip leading space
|
||
|
while(*dateString && isspace(*dateString))
|
||
|
dateString++;
|
||
|
|
||
|
// Strip weekday
|
||
|
while(*dateString && !isdigit(*dateString) && !isspace(*dateString))
|
||
|
dateString++;
|
||
|
|
||
|
// Strip trailing space
|
||
|
while(*dateString && isspace(*dateString))
|
||
|
dateString++;
|
||
|
|
||
|
if (!*dateString)
|
||
|
return result; // Invalid date
|
||
|
|
||
|
if (isalpha(*dateString))
|
||
|
{
|
||
|
// ' Nov 5 1994 18:15:30 GMT'
|
||
|
// Strip leading space
|
||
|
while(*dateString && isspace(*dateString))
|
||
|
dateString++;
|
||
|
|
||
|
for(int i=0; i < 3;i++)
|
||
|
{
|
||
|
if (!*dateString || (*dateString == '-') || isspace(*dateString))
|
||
|
return result; // Invalid date
|
||
|
monthStr[i] = tolower(*dateString++);
|
||
|
}
|
||
|
monthStr[3] = '\0';
|
||
|
|
||
|
newPosStr = (char*)strstr(haystack, monthStr);
|
||
|
|
||
|
if (!newPosStr)
|
||
|
return result; // Invalid date
|
||
|
|
||
|
month = (newPosStr-haystack)/3; // Jan=00, Feb=01, Mar=02, ..
|
||
|
|
||
|
if ((month < 0) || (month > 11))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
while (*dateString && isalpha(*dateString))
|
||
|
dateString++; // Skip rest of month-name
|
||
|
}
|
||
|
|
||
|
// ' 09-Nov-99 23:12:40 GMT'
|
||
|
// ' 5 1994 18:15:30 GMT'
|
||
|
day = strtol(dateString, &newPosStr, 10);
|
||
|
dateString = newPosStr;
|
||
|
|
||
|
if ((day < 1) || (day > 31))
|
||
|
return result; // Invalid date;
|
||
|
|
||
|
if (!*dateString)
|
||
|
return result; // Invalid date
|
||
|
|
||
|
while(*dateString && (isspace(*dateString) || (*dateString == '-')))
|
||
|
dateString++;
|
||
|
|
||
|
if (month == -1)
|
||
|
{
|
||
|
for(int i=0; i < 3;i++)
|
||
|
{
|
||
|
if (!*dateString || (*dateString == '-') || isspace(*dateString))
|
||
|
return result; // Invalid date
|
||
|
monthStr[i] = tolower(*dateString++);
|
||
|
}
|
||
|
monthStr[3] = '\0';
|
||
|
|
||
|
newPosStr = (char*)strstr(haystack, monthStr);
|
||
|
|
||
|
if (!newPosStr)
|
||
|
return result; // Invalid date
|
||
|
|
||
|
month = (newPosStr-haystack)/3; // Jan=00, Feb=01, Mar=02, ..
|
||
|
|
||
|
if ((month < 0) || (month > 11))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
while (*dateString && isalpha(*dateString))
|
||
|
dateString++; // Skip rest of month-name
|
||
|
|
||
|
}
|
||
|
|
||
|
// '-99 23:12:40 GMT'
|
||
|
while(*dateString && (isspace(*dateString) || (*dateString == '-')))
|
||
|
dateString++;
|
||
|
|
||
|
if (!*dateString || !isdigit(*dateString))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
// '99 23:12:40 GMT'
|
||
|
year = strtol(dateString, &newPosStr, 10);
|
||
|
dateString = newPosStr;
|
||
|
|
||
|
// Y2K: Solve 2 digit years
|
||
|
if ((year >= 0) && (year < 50))
|
||
|
year += 2000;
|
||
|
|
||
|
if ((year >= 50) && (year < 100))
|
||
|
year += 1900; // Y2K
|
||
|
|
||
|
if ((year < 1900) || (year > 2500))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
// Don't fail if the time is missing.
|
||
|
if (*dateString)
|
||
|
{
|
||
|
// ' 23:12:40 GMT'
|
||
|
if (!isspace(*dateString++))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
hour = strtol(dateString, &newPosStr, 10);
|
||
|
dateString = newPosStr;
|
||
|
|
||
|
if ((hour < 0) || (hour > 23))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
if (!*dateString)
|
||
|
return result; // Invalid date
|
||
|
|
||
|
// ':12:40 GMT'
|
||
|
if (*dateString++ != ':')
|
||
|
return result; // Invalid date
|
||
|
|
||
|
minute = strtol(dateString, &newPosStr, 10);
|
||
|
dateString = newPosStr;
|
||
|
|
||
|
if ((minute < 0) || (minute > 59))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
if (!*dateString)
|
||
|
return result; // Invalid date
|
||
|
|
||
|
// ':40 GMT'
|
||
|
if (*dateString != ':' && !isspace(*dateString))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
// seconds are optional in rfc822 + rfc2822
|
||
|
if (*dateString ==':') {
|
||
|
dateString++;
|
||
|
|
||
|
second = strtol(dateString, &newPosStr, 10);
|
||
|
dateString = newPosStr;
|
||
|
|
||
|
if ((second < 0) || (second > 59))
|
||
|
return result; // Invalid date
|
||
|
} else {
|
||
|
dateString++;
|
||
|
}
|
||
|
|
||
|
while(*dateString && isspace(*dateString))
|
||
|
dateString++;
|
||
|
}
|
||
|
|
||
|
// don't fail if the time zone is missing, some
|
||
|
// broken mail-/news-clients omit the time zone
|
||
|
if (*dateString) {
|
||
|
if ((strncasecmp(dateString, "gmt", 3) == 0) ||
|
||
|
(strncasecmp(dateString, "utc", 3) == 0))
|
||
|
{
|
||
|
dateString += 3;
|
||
|
while(*dateString && isspace(*dateString))
|
||
|
dateString++;
|
||
|
}
|
||
|
|
||
|
if ((*dateString == '+') || (*dateString == '-')) {
|
||
|
offset = strtol(dateString, &newPosStr, 10);
|
||
|
if (abs(offset) < 30)
|
||
|
{
|
||
|
dateString = newPosStr;
|
||
|
|
||
|
offset = offset * 100;
|
||
|
|
||
|
if (*dateString && *(dateString+1))
|
||
|
{
|
||
|
dateString++;
|
||
|
int minutes = strtol(dateString, &newPosStr, 10);
|
||
|
if (offset > 0)
|
||
|
offset += minutes;
|
||
|
else
|
||
|
offset -= minutes;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ((offset < -9959) || (offset > 9959))
|
||
|
return result; // Invalid date
|
||
|
|
||
|
int sgn = (offset < 0)? -1:1;
|
||
|
offset = abs(offset);
|
||
|
offset = ((offset / 100)*60 + (offset % 100))*sgn;
|
||
|
} else {
|
||
|
for (int i=0; known_zones[i].tzName != 0; i++) {
|
||
|
if (0 == strncasecmp(dateString, known_zones[i].tzName, strlen(known_zones[i].tzName))) {
|
||
|
offset = known_zones[i].tzOffset;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
result = ymdhms_to_seconds(year, month+1, day, hour, minute, second);
|
||
|
|
||
|
// avoid negative time values
|
||
|
if ((offset > 0) && (offset > result))
|
||
|
offset = 0;
|
||
|
|
||
|
result -= offset*60;
|
||
|
|
||
|
// If epoch 0 return epoch +1 which is Thu, 01-Jan-70 00:00:01 GMT
|
||
|
// This is so that parse error and valid epoch 0 return values won't
|
||
|
// be the same for sensitive applications...
|
||
|
if (result < 1) result = 1;
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
// copied and converted to std::string from KRFCDate::parseDateISO8601
|
||
|
static time_t parseISO8601Date(const std::string &pubDate)
|
||
|
{
|
||
|
if (pubDate.empty()) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// These dates look like this:
|
||
|
// YYYY-MM-DDTHH:MM:SS
|
||
|
// But they may also have 0, 1 or 2 suffixes.
|
||
|
// Suffix 1: .secfrac (fraction of second)
|
||
|
// Suffix 2: Either 'Z' or +zone or -zone, where zone is HHMM
|
||
|
|
||
|
unsigned int year = 0;
|
||
|
unsigned int month = 0;
|
||
|
unsigned int mday = 0;
|
||
|
unsigned int hour = 0;
|
||
|
unsigned int min = 0;
|
||
|
unsigned int sec = 0;
|
||
|
|
||
|
int offset = 0;
|
||
|
|
||
|
std::string input = pubDate;
|
||
|
|
||
|
// First find the 'T' separator, if any.
|
||
|
int tPos = input.find('T');
|
||
|
|
||
|
// If there is no time, no month or no day specified, fill those missing
|
||
|
// fields so that 'input' matches YYYY-MM-DDTHH:MM:SS
|
||
|
if (-1 == tPos) {
|
||
|
int dashes = 0;
|
||
|
std::string::iterator it;
|
||
|
for (it = input.begin(); it != input.end(); ++it) {
|
||
|
if (*it == '-') {
|
||
|
++dashes;
|
||
|
}
|
||
|
}
|
||
|
if (0 == dashes) {
|
||
|
input += "-01-01";
|
||
|
} else if (1 == dashes) {
|
||
|
input += "-01";
|
||
|
}
|
||
|
tPos = input.length();
|
||
|
input += "T12:00:00";
|
||
|
}
|
||
|
|
||
|
// Now parse the date part.
|
||
|
|
||
|
std::string dateString = input.substr(0, tPos);//.stripWhiteSpace();
|
||
|
|
||
|
std::string timeString = input.substr(tPos + 1);//.stripWhiteSpace();
|
||
|
|
||
|
std::vector<std::string> l;
|
||
|
splitString(dateString, l, '-');
|
||
|
|
||
|
if (l.size() < 3)
|
||
|
return 0;
|
||
|
|
||
|
sscanf(l[0].c_str(), "%u", &year);
|
||
|
sscanf(l[1].c_str(), "%u", &month);
|
||
|
sscanf(l[2].c_str(), "%u", &mday);
|
||
|
|
||
|
// Z suffix means UTC.
|
||
|
if ('Z' == timeString[timeString.length() - 1]) {
|
||
|
timeString.erase(timeString.length() - 1, 1);
|
||
|
}
|
||
|
|
||
|
// +zone or -zone suffix (offset from UTC).
|
||
|
|
||
|
int plusPos = timeString.find_last_of('+');
|
||
|
|
||
|
if (-1 != plusPos) {
|
||
|
std::string offsetString = timeString.substr(plusPos + 1);
|
||
|
|
||
|
unsigned int offsetHour;
|
||
|
unsigned int offsetMinute;
|
||
|
|
||
|
sscanf(offsetString.substr(0, 1).c_str(), "%u", &offsetHour);
|
||
|
sscanf(offsetString.substr(offsetString.length() - 2).c_str(), "%u", &offsetMinute);
|
||
|
|
||
|
offset = offsetHour * 60 + offsetMinute;
|
||
|
|
||
|
timeString = timeString.substr(0, plusPos);
|
||
|
} else {
|
||
|
int minusPos = timeString.find_last_of('-');
|
||
|
|
||
|
if (-1 != minusPos) {
|
||
|
std::string offsetString = timeString.substr(minusPos + 1);
|
||
|
|
||
|
unsigned int offsetHour;
|
||
|
unsigned int offsetMinute;
|
||
|
|
||
|
sscanf(offsetString.substr(0, 1).c_str(), "%u", &offsetHour);
|
||
|
sscanf(offsetString.substr(offsetString.length() - 2).c_str(), "%u", &offsetMinute);
|
||
|
|
||
|
timeString = timeString.substr(0, minusPos);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// secfrac suffix.
|
||
|
int dotPos = timeString.find_last_of('.');
|
||
|
|
||
|
if (-1 != dotPos) {
|
||
|
timeString = timeString.substr(0, dotPos);
|
||
|
}
|
||
|
|
||
|
// Now parse the time part.
|
||
|
|
||
|
splitString(timeString, l, ':');
|
||
|
|
||
|
if (l.size() < 3)
|
||
|
return 0;
|
||
|
|
||
|
sscanf(l[0].c_str(), "%u", &hour);
|
||
|
sscanf(l[1].c_str(), "%u", &min);
|
||
|
sscanf(l[2].c_str(), "%u", &sec);
|
||
|
|
||
|
time_t result = ymdhms_to_seconds(year, month, mday, hour, min, sec);
|
||
|
|
||
|
// avoid negative time values
|
||
|
if ((offset > 0) && (offset > result))
|
||
|
offset = 0;
|
||
|
|
||
|
result -= offset*60;
|
||
|
|
||
|
// If epoch 0 return epoch +1 which is Thu, 01-Jan-70 00:00:01 GMT
|
||
|
// This is so that parse error and valid epoch 0 return values won't
|
||
|
// be the same for sensitive applications...
|
||
|
if (result < 1) result = 1;
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
p3FeedReaderThread::ProcessResult p3FeedReaderThread::process(const RsFeedReaderFeed &feed, std::list<RsFeedReaderMsg*> &entries, std::string &error)
|
||
|
{
|
||
|
#ifdef FEEDREADER_DEBUG
|
||
|
std::cerr << "p3FeedReaderThread::process - feed " << feed.feedId << " (" << feed.name << ")" << std::endl;
|
||
|
#endif
|
||
|
|
||
|
ProcessResult result = PROCESS_SUCCESS;
|
||
|
|
||
|
xmlDocPtr document = xmlReadDoc((const xmlChar*) feed.content.c_str(), "", NULL, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_COMPACT | XML_PARSE_NOCDATA);
|
||
|
if (document) {
|
||
|
xmlNodePtr root = xmlDocGetRootElement(document);
|
||
|
if (root) {
|
||
|
FeedFormat feedFormat;
|
||
|
if (xmlStrcmp (root->name, (xmlChar*) "rss") == 0) {
|
||
|
feedFormat = FORMAT_RSS;
|
||
|
} else if (xmlStrcmp (root->name, (xmlChar*) "rdf") != 0) {
|
||
|
feedFormat = FORMAT_RDF;
|
||
|
} else {
|
||
|
result = PROCESS_UNKNOWN_FORMAT;
|
||
|
error = "Only RSS or RDF supported";
|
||
|
}
|
||
|
|
||
|
if (result == PROCESS_SUCCESS) {
|
||
|
xmlNodePtr channel = findNode(root->children, "channel");
|
||
|
if (channel) {
|
||
|
/* import header info */
|
||
|
if (feed.flag & RS_FEED_FLAG_INFO_FROM_FEED) {
|
||
|
std::string title;
|
||
|
if (getChildText(mCharEncodingHandler, channel, "title", title) && !title.empty()) {
|
||
|
std::string::size_type p;
|
||
|
while ((p = title.find_first_of("\r\n")) != std::string::npos) {
|
||
|
title.erase(p, 1);
|
||
|
}
|
||
|
std::string description;
|
||
|
getChildText(mCharEncodingHandler, channel, "description", description);
|
||
|
mFeedReader->setFeedInfo(feed.feedId, title, description);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* get item count */
|
||
|
xmlNodePtr node;
|
||
|
for (node = NULL; (node = getNextItem(feedFormat, channel, node)) != NULL; ) {
|
||
|
if (!isRunning()) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
std::string title;
|
||
|
if (!getChildText(mCharEncodingHandler, node, "title", title) || title.empty()) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* remove newlines */
|
||
|
std::string::size_type p;
|
||
|
while ((p = title.find_first_of("\r\n")) != std::string::npos) {
|
||
|
title.erase(p, 1);
|
||
|
}
|
||
|
|
||
|
RsFeedReaderMsg *item = new RsFeedReaderMsg();
|
||
|
item->msgId.clear(); // is calculated later
|
||
|
item->feedId = feed.feedId;
|
||
|
item->title = title;
|
||
|
|
||
|
getChildText(mCharEncodingHandler, node, "link", item->link);
|
||
|
|
||
|
long todo; // remove sid
|
||
|
// // remove sid=
|
||
|
// CString sLinkUpper = sLink;
|
||
|
// sLinkUpper.MakeUpper ();
|
||
|
// int nSIDStart = sLinkUpper.Find (TEXT("SID="));
|
||
|
// if (nSIDStart != -1) {
|
||
|
// int nSIDEnd1 = sLinkUpper.Find (TEXT(";"), nSIDStart);
|
||
|
// int nSIDEnd2 = sLinkUpper.Find (TEXT("#"), nSIDStart);
|
||
|
|
||
|
// if (nSIDEnd1 == -1) {
|
||
|
// nSIDEnd1 = sLinkUpper.GetLength ();
|
||
|
// }
|
||
|
// if (nSIDEnd2 == -1) {
|
||
|
// nSIDEnd2 = sLinkUpper.GetLength ();
|
||
|
// }
|
||
|
|
||
|
// if (nSIDStart > 0 && sLinkUpper [nSIDStart - 1] == '&') {
|
||
|
// nSIDStart--;
|
||
|
// }
|
||
|
|
||
|
// int nSIDEnd = min (nSIDEnd1, nSIDEnd2);
|
||
|
// sLink.Delete (nSIDStart, nSIDEnd - nSIDStart);
|
||
|
// }
|
||
|
|
||
|
getChildText(mCharEncodingHandler, node, "author", item->author);
|
||
|
getChildText(mCharEncodingHandler, node, "description", item->description);
|
||
|
|
||
|
std::string pubDate;
|
||
|
if (getChildText(mCharEncodingHandler, node, "pubdate", pubDate)) {
|
||
|
item->pubDate = parseRFC822Date(pubDate);
|
||
|
}
|
||
|
if (getChildText(mCharEncodingHandler, node, "date", pubDate)) {
|
||
|
item->pubDate = parseISO8601Date (pubDate);
|
||
|
}
|
||
|
|
||
|
if (item->pubDate == 0) {
|
||
|
/* use current time */
|
||
|
item->pubDate = time(NULL);
|
||
|
}
|
||
|
|
||
|
entries.push_back(item);
|
||
|
}
|
||
|
} else {
|
||
|
result = PROCESS_UNKNOWN_FORMAT;
|
||
|
error = "Channel not found";
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
result = PROCESS_UNKNOWN_FORMAT;
|
||
|
error = "Can't read document";
|
||
|
}
|
||
|
|
||
|
xmlFreeDoc(document);
|
||
|
} else {
|
||
|
result = PROCESS_ERROR_INIT;
|
||
|
}
|
||
|
|
||
|
#ifdef FEEDREADER_DEBUG
|
||
|
std::cerr << "p3FeedReaderThread::process - feed " << feed.feedId << " (" << feed.name << "), result " << result << ", error = " << error << std::endl;
|
||
|
#endif
|
||
|
|
||
|
return result;
|
||
|
}
|