mirror of
https://github.com/RetroShare/RetroShare.git
synced 2024-10-01 02:35:48 -04:00
FeedReader:
- Changed compare of node names to case insensitive. More feeds should be supported now. - added ATOM format git-svn-id: http://svn.code.sf.net/p/retroshare/code/trunk@6056 b45a01b8-16f6-495d-af2f-9b41ad6348cc
This commit is contained in:
parent
35cc460e71
commit
c44d10a6a1
@ -578,7 +578,7 @@ static void buildNodeText(HTMLWrapper &html, xmlNodePtr node, QString &text)
|
||||
if (node->children && !node->children->next && node->children->type == XML_TEXT_NODE) {
|
||||
/* only one text node as child */
|
||||
std::string content;
|
||||
if (html.getContent(node->children, content)) {
|
||||
if (html.getContent(node->children, content, false)) {
|
||||
text += QString::fromUtf8(content.c_str());
|
||||
} else {
|
||||
text += QApplication::translate("PreviewFeedDialog", "Error getting content");
|
||||
@ -597,7 +597,7 @@ static void buildNodeText(HTMLWrapper &html, xmlNodePtr node, QString &text)
|
||||
}
|
||||
|
||||
std::string content;
|
||||
if (html.getContent(node, content)) {
|
||||
if (html.getContent(node, content, false)) {
|
||||
text += QString::fromUtf8(content.c_str());
|
||||
} else {
|
||||
text += QApplication::translate("PreviewFeedDialog", "Error getting content");
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include <openssl/evp.h>
|
||||
#include <unistd.h> // for usleep
|
||||
|
||||
enum FeedFormat { FORMAT_RSS, FORMAT_RDF };
|
||||
enum FeedFormat { FORMAT_RSS, FORMAT_RDF, FORMAT_ATOM };
|
||||
|
||||
/*********
|
||||
* #define FEEDREADER_DEBUG
|
||||
@ -278,7 +278,8 @@ RsFeedReaderErrorState p3FeedReaderThread::download(const RsFeedReaderFeed &feed
|
||||
if (isContentType(contentType, "text/xml") ||
|
||||
isContentType(contentType, "application/rss+xml") ||
|
||||
isContentType(contentType, "application/xml") ||
|
||||
isContentType(contentType, "application/xhtml+xml")) {
|
||||
isContentType(contentType, "application/xhtml+xml") ||
|
||||
isContentType(contentType, "application/atom+xml")) {
|
||||
/* ok */
|
||||
result = RS_FEED_ERRORSTATE_OK;
|
||||
} else {
|
||||
@ -321,6 +322,7 @@ static xmlNodePtr getNextItem(FeedFormat feedFormat, xmlNodePtr channel, xmlNode
|
||||
if (!item) {
|
||||
switch (feedFormat) {
|
||||
case FORMAT_RSS:
|
||||
case FORMAT_ATOM:
|
||||
item = channel->children;
|
||||
break;
|
||||
case FORMAT_RDF:
|
||||
@ -333,7 +335,7 @@ static xmlNodePtr getNextItem(FeedFormat feedFormat, xmlNodePtr channel, xmlNode
|
||||
item = item->next;
|
||||
}
|
||||
for (; item; item = item->next) {
|
||||
if (item->type == XML_ELEMENT_NODE && xmlStrEqual(item->name, BAD_CAST"item")) {
|
||||
if (item->type == XML_ELEMENT_NODE && xmlStrcasecmp(item->name, (feedFormat == FORMAT_ATOM) ? BAD_CAST"entry" : BAD_CAST"item") == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -809,17 +811,29 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed,
|
||||
xmlNodePtr root = xml.getRootElement();
|
||||
if (root) {
|
||||
FeedFormat feedFormat;
|
||||
if (xmlStrEqual(root->name, BAD_CAST"rss")) {
|
||||
if (xmlStrcasecmp(root->name, BAD_CAST"rss") == 0) {
|
||||
feedFormat = FORMAT_RSS;
|
||||
} else if (xmlStrEqual (root->name, BAD_CAST"rdf")) {
|
||||
} else if (xmlStrcasecmp (root->name, BAD_CAST"rdf") == 0) {
|
||||
feedFormat = FORMAT_RDF;
|
||||
} else if (xmlStrcasecmp (root->name, BAD_CAST"feed") == 0) {
|
||||
feedFormat = FORMAT_ATOM;
|
||||
} else {
|
||||
result = RS_FEED_ERRORSTATE_PROCESS_UNKNOWN_FORMAT;
|
||||
error = "Only RSS or RDF supported";
|
||||
error = "Only RSS, RDF or ATOM supported";
|
||||
}
|
||||
|
||||
if (result == RS_FEED_ERRORSTATE_OK) {
|
||||
xmlNodePtr channel = xml.findNode(root->children, "channel");
|
||||
xmlNodePtr channel = NULL;
|
||||
switch (feedFormat) {
|
||||
case FORMAT_RSS:
|
||||
case FORMAT_RDF:
|
||||
channel = xml.findNode(root->children, "channel");
|
||||
break;
|
||||
case FORMAT_ATOM:
|
||||
channel = root;
|
||||
break;
|
||||
}
|
||||
|
||||
if (channel) {
|
||||
/* import header info */
|
||||
if (feed.flag & RS_FEED_FLAG_INFO_FROM_FEED) {
|
||||
@ -830,7 +844,7 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed,
|
||||
title.erase(p, 1);
|
||||
}
|
||||
std::string description;
|
||||
xml.getChildText(channel, "description", description);
|
||||
xml.getChildText(channel, (feedFormat == FORMAT_ATOM) ? "subtitle" : "description", description);
|
||||
mFeedReader->setFeedInfo(feed.feedId, title, description);
|
||||
}
|
||||
}
|
||||
@ -888,7 +902,19 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed,
|
||||
|
||||
xml.getChildText(node, "author", item->author);
|
||||
|
||||
xml.getChildText(node, "description", item->description);
|
||||
switch (feedFormat) {
|
||||
case FORMAT_RSS:
|
||||
case FORMAT_RDF:
|
||||
xml.getChildText(node, "description", item->description);
|
||||
break;
|
||||
case FORMAT_ATOM:
|
||||
/* try content */
|
||||
if (!xml.getChildText(node, "content", item->description)) {
|
||||
/* use summary */
|
||||
xml.getChildText(node, "summary", item->description);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
std::string pubDate;
|
||||
if (xml.getChildText(node, "pubdate", pubDate)) {
|
||||
@ -897,6 +923,10 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed,
|
||||
if (xml.getChildText(node, "date", pubDate)) {
|
||||
item->pubDate = parseISO8601Date (pubDate);
|
||||
}
|
||||
if (xml.getChildText(node, "updated", pubDate)) {
|
||||
// atom
|
||||
item->pubDate = parseISO8601Date (pubDate);
|
||||
}
|
||||
|
||||
if (item->pubDate == 0) {
|
||||
/* use current time */
|
||||
@ -1029,7 +1059,7 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe
|
||||
|
||||
switch (node->type) {
|
||||
case XML_ELEMENT_NODE:
|
||||
if (xmlStrEqual(node->name, BAD_CAST"img")) {
|
||||
if (xmlStrcasecmp(node->name, BAD_CAST"img") == 0) {
|
||||
/* process images */
|
||||
|
||||
if ((feed.flag & RS_FEED_FLAG_EMBED_IMAGES) == 0) {
|
||||
@ -1038,7 +1068,7 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe
|
||||
nodesToDelete.push_back(node);
|
||||
continue;
|
||||
}
|
||||
} else if (xmlStrEqual(node->name, BAD_CAST"script")) {
|
||||
} else if (xmlStrcasecmp(node->name, BAD_CAST"script") == 0) {
|
||||
/* remove script */
|
||||
xmlUnlinkNode(node);
|
||||
nodesToDelete.push_back(node);
|
||||
@ -1055,22 +1085,11 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe
|
||||
{
|
||||
/* check for only space */
|
||||
std::string content;
|
||||
if (html.getContent(node, content)) {
|
||||
if (html.getContent(node, content, false)) {
|
||||
std::string newContent = content;
|
||||
|
||||
/* trim left */
|
||||
std::string::size_type find = newContent.find_first_not_of(" \t\r\n");
|
||||
if (find != std::string::npos) {
|
||||
newContent.erase(0, find);
|
||||
|
||||
/* trim right */
|
||||
find = newContent.find_last_not_of(" \t\r\n");
|
||||
if (find != std::string::npos) {
|
||||
newContent.erase(find + 1);
|
||||
}
|
||||
} else {
|
||||
newContent.clear();
|
||||
}
|
||||
/* trim */
|
||||
XMLWrapper::trimString(newContent);
|
||||
|
||||
if (newContent.empty()) {
|
||||
xmlUnlinkNode(node);
|
||||
|
@ -42,6 +42,24 @@ XMLWrapper::~XMLWrapper()
|
||||
xmlCharEncCloseFunc(mCharEncodingHandler);
|
||||
}
|
||||
|
||||
void XMLWrapper::trimString(std::string &string)
|
||||
{
|
||||
/* trim left */
|
||||
std::string::size_type find = string.find_first_not_of(" \t\r\n");
|
||||
if (find != std::string::npos) {
|
||||
string.erase(0, find);
|
||||
|
||||
/* trim right */
|
||||
find = string.find_last_not_of(" \t\r\n");
|
||||
if (find != std::string::npos) {
|
||||
string.erase(find + 1);
|
||||
}
|
||||
} else {
|
||||
string.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
XMLWrapper &XMLWrapper::operator=(const XMLWrapper &xml)
|
||||
{
|
||||
cleanup();
|
||||
@ -124,7 +142,7 @@ bool XMLWrapper::readXML(const char *xml)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool XMLWrapper::getContent(xmlNodePtr node, std::string &content)
|
||||
bool XMLWrapper::getContent(xmlNodePtr node, std::string &content, bool trim)
|
||||
{
|
||||
content.clear();
|
||||
|
||||
@ -140,6 +158,10 @@ bool XMLWrapper::getContent(xmlNodePtr node, std::string &content)
|
||||
bool result = convertToString(xmlContent, content);
|
||||
xmlFree(xmlContent);
|
||||
|
||||
if (result && trim) {
|
||||
trimString(content);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -160,6 +182,41 @@ bool XMLWrapper::setContent(xmlNodePtr node, const char *content)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool XMLWrapper::nodeDump(xmlNodePtr node, std::string &content, bool trim)
|
||||
{
|
||||
content.clear();
|
||||
|
||||
if (!mDocument) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
|
||||
xmlBufferPtr buffer = xmlBufferCreate();
|
||||
if (buffer) {
|
||||
xmlOutputBufferPtr outputBuffer = xmlOutputBufferCreateBuffer(buffer, NULL);
|
||||
if (outputBuffer) {
|
||||
xmlNodeDumpOutput(outputBuffer, mDocument, node, 0, 0, "UTF8");
|
||||
xmlOutputBufferClose(outputBuffer);
|
||||
outputBuffer = NULL;
|
||||
|
||||
result = convertToString(buffer->content, content);
|
||||
|
||||
if (result && trim) {
|
||||
trimString(content);
|
||||
}
|
||||
}
|
||||
xmlBufferFree(buffer);
|
||||
buffer = NULL;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string XMLWrapper::nodeName(xmlNodePtr node)
|
||||
{
|
||||
std::string name;
|
||||
@ -229,6 +286,16 @@ bool XMLWrapper::getChildText(xmlNodePtr node, const char *childName, std::strin
|
||||
return false;
|
||||
}
|
||||
|
||||
if (getAttr(child, "type") == "xhtml") {
|
||||
/* search div */
|
||||
xmlNodePtr div = findNode(child->children, "div", false);
|
||||
if (!div) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return nodeDump(div, text, true);
|
||||
}
|
||||
|
||||
if (child->children->type != XML_TEXT_NODE) {
|
||||
return false;
|
||||
}
|
||||
|
@ -33,6 +33,9 @@ public:
|
||||
XMLWrapper();
|
||||
~XMLWrapper();
|
||||
|
||||
// find better place
|
||||
static void trimString(std::string &string);
|
||||
|
||||
XMLWrapper &operator=(const XMLWrapper &xml);
|
||||
|
||||
void cleanup();
|
||||
@ -48,9 +51,11 @@ public:
|
||||
xmlNodePtr findNode(xmlNodePtr node, const char *name, bool children = false);
|
||||
bool getChildText(xmlNodePtr node, const char *childName, std::string &text);
|
||||
|
||||
bool getContent(xmlNodePtr node, std::string &content);
|
||||
bool getContent(xmlNodePtr node, std::string &content, bool trim);
|
||||
bool setContent(xmlNodePtr node, const char *content);
|
||||
|
||||
bool nodeDump(xmlNodePtr node, std::string &content, bool trim);
|
||||
|
||||
std::string getAttr(xmlNodePtr node, xmlAttrPtr attr);
|
||||
std::string getAttr(xmlNodePtr node, const char *name);
|
||||
bool setAttr(xmlNodePtr node, const char *name, const char *value);
|
||||
|
Loading…
Reference in New Issue
Block a user