From c44d10a6a1870ae85194f68d6c1bfd6cf4dc17fe Mon Sep 17 00:00:00 2001 From: thunder2 Date: Wed, 2 Jan 2013 20:18:42 +0000 Subject: [PATCH] FeedReader: - Changed compare of node names to case insensitive. More feeds should be supported now. - added ATOM format git-svn-id: http://svn.code.sf.net/p/retroshare/code/trunk@6056 b45a01b8-16f6-495d-af2f-9b41ad6348cc --- plugins/FeedReader/gui/PreviewFeedDialog.cpp | 4 +- .../FeedReader/services/p3FeedReaderThread.cc | 69 ++++++++++++------- plugins/FeedReader/util/XMLWrapper.cpp | 69 ++++++++++++++++++- plugins/FeedReader/util/XMLWrapper.h | 7 +- 4 files changed, 120 insertions(+), 29 deletions(-) diff --git a/plugins/FeedReader/gui/PreviewFeedDialog.cpp b/plugins/FeedReader/gui/PreviewFeedDialog.cpp index 947057798..79f2eed44 100644 --- a/plugins/FeedReader/gui/PreviewFeedDialog.cpp +++ b/plugins/FeedReader/gui/PreviewFeedDialog.cpp @@ -578,7 +578,7 @@ static void buildNodeText(HTMLWrapper &html, xmlNodePtr node, QString &text) if (node->children && !node->children->next && node->children->type == XML_TEXT_NODE) { /* only one text node as child */ std::string content; - if (html.getContent(node->children, content)) { + if (html.getContent(node->children, content, false)) { text += QString::fromUtf8(content.c_str()); } else { text += QApplication::translate("PreviewFeedDialog", "Error getting content"); @@ -597,7 +597,7 @@ static void buildNodeText(HTMLWrapper &html, xmlNodePtr node, QString &text) } std::string content; - if (html.getContent(node, content)) { + if (html.getContent(node, content, false)) { text += QString::fromUtf8(content.c_str()); } else { text += QApplication::translate("PreviewFeedDialog", "Error getting content"); diff --git a/plugins/FeedReader/services/p3FeedReaderThread.cc b/plugins/FeedReader/services/p3FeedReaderThread.cc index 755c715f6..69e7c5741 100644 --- a/plugins/FeedReader/services/p3FeedReaderThread.cc +++ b/plugins/FeedReader/services/p3FeedReaderThread.cc @@ -30,7 +30,7 @@ #include #include // for usleep -enum FeedFormat { FORMAT_RSS, FORMAT_RDF }; +enum FeedFormat { FORMAT_RSS, FORMAT_RDF, FORMAT_ATOM }; /********* * #define FEEDREADER_DEBUG @@ -278,7 +278,8 @@ RsFeedReaderErrorState p3FeedReaderThread::download(const RsFeedReaderFeed &feed if (isContentType(contentType, "text/xml") || isContentType(contentType, "application/rss+xml") || isContentType(contentType, "application/xml") || - isContentType(contentType, "application/xhtml+xml")) { + isContentType(contentType, "application/xhtml+xml") || + isContentType(contentType, "application/atom+xml")) { /* ok */ result = RS_FEED_ERRORSTATE_OK; } else { @@ -321,6 +322,7 @@ static xmlNodePtr getNextItem(FeedFormat feedFormat, xmlNodePtr channel, xmlNode if (!item) { switch (feedFormat) { case FORMAT_RSS: + case FORMAT_ATOM: item = channel->children; break; case FORMAT_RDF: @@ -333,7 +335,7 @@ static xmlNodePtr getNextItem(FeedFormat feedFormat, xmlNodePtr channel, xmlNode item = item->next; } for (; item; item = item->next) { - if (item->type == XML_ELEMENT_NODE && xmlStrEqual(item->name, BAD_CAST"item")) { + if (item->type == XML_ELEMENT_NODE && xmlStrcasecmp(item->name, (feedFormat == FORMAT_ATOM) ? BAD_CAST"entry" : BAD_CAST"item") == 0) { break; } } @@ -809,17 +811,29 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed, xmlNodePtr root = xml.getRootElement(); if (root) { FeedFormat feedFormat; - if (xmlStrEqual(root->name, BAD_CAST"rss")) { + if (xmlStrcasecmp(root->name, BAD_CAST"rss") == 0) { feedFormat = FORMAT_RSS; - } else if (xmlStrEqual (root->name, BAD_CAST"rdf")) { + } else if (xmlStrcasecmp (root->name, BAD_CAST"rdf") == 0) { feedFormat = FORMAT_RDF; + } else if (xmlStrcasecmp (root->name, BAD_CAST"feed") == 0) { + feedFormat = FORMAT_ATOM; } else { result = RS_FEED_ERRORSTATE_PROCESS_UNKNOWN_FORMAT; - error = "Only RSS or RDF supported"; + error = "Only RSS, RDF or ATOM supported"; } if (result == RS_FEED_ERRORSTATE_OK) { - xmlNodePtr channel = xml.findNode(root->children, "channel"); + xmlNodePtr channel = NULL; + switch (feedFormat) { + case FORMAT_RSS: + case FORMAT_RDF: + channel = xml.findNode(root->children, "channel"); + break; + case FORMAT_ATOM: + channel = root; + break; + } + if (channel) { /* import header info */ if (feed.flag & RS_FEED_FLAG_INFO_FROM_FEED) { @@ -830,7 +844,7 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed, title.erase(p, 1); } std::string description; - xml.getChildText(channel, "description", description); + xml.getChildText(channel, (feedFormat == FORMAT_ATOM) ? "subtitle" : "description", description); mFeedReader->setFeedInfo(feed.feedId, title, description); } } @@ -888,7 +902,19 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed, xml.getChildText(node, "author", item->author); - xml.getChildText(node, "description", item->description); + switch (feedFormat) { + case FORMAT_RSS: + case FORMAT_RDF: + xml.getChildText(node, "description", item->description); + break; + case FORMAT_ATOM: + /* try content */ + if (!xml.getChildText(node, "content", item->description)) { + /* use summary */ + xml.getChildText(node, "summary", item->description); + } + break; + } std::string pubDate; if (xml.getChildText(node, "pubdate", pubDate)) { @@ -897,6 +923,10 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed, if (xml.getChildText(node, "date", pubDate)) { item->pubDate = parseISO8601Date (pubDate); } + if (xml.getChildText(node, "updated", pubDate)) { + // atom + item->pubDate = parseISO8601Date (pubDate); + } if (item->pubDate == 0) { /* use current time */ @@ -1029,7 +1059,7 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe switch (node->type) { case XML_ELEMENT_NODE: - if (xmlStrEqual(node->name, BAD_CAST"img")) { + if (xmlStrcasecmp(node->name, BAD_CAST"img") == 0) { /* process images */ if ((feed.flag & RS_FEED_FLAG_EMBED_IMAGES) == 0) { @@ -1038,7 +1068,7 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe nodesToDelete.push_back(node); continue; } - } else if (xmlStrEqual(node->name, BAD_CAST"script")) { + } else if (xmlStrcasecmp(node->name, BAD_CAST"script") == 0) { /* remove script */ xmlUnlinkNode(node); nodesToDelete.push_back(node); @@ -1055,22 +1085,11 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe { /* check for only space */ std::string content; - if (html.getContent(node, content)) { + if (html.getContent(node, content, false)) { std::string newContent = content; - /* trim left */ - std::string::size_type find = newContent.find_first_not_of(" \t\r\n"); - if (find != std::string::npos) { - newContent.erase(0, find); - - /* trim right */ - find = newContent.find_last_not_of(" \t\r\n"); - if (find != std::string::npos) { - newContent.erase(find + 1); - } - } else { - newContent.clear(); - } + /* trim */ + XMLWrapper::trimString(newContent); if (newContent.empty()) { xmlUnlinkNode(node); diff --git a/plugins/FeedReader/util/XMLWrapper.cpp b/plugins/FeedReader/util/XMLWrapper.cpp index c1a4b2143..c3d2cab27 100644 --- a/plugins/FeedReader/util/XMLWrapper.cpp +++ b/plugins/FeedReader/util/XMLWrapper.cpp @@ -42,6 +42,24 @@ XMLWrapper::~XMLWrapper() xmlCharEncCloseFunc(mCharEncodingHandler); } +void XMLWrapper::trimString(std::string &string) +{ + /* trim left */ + std::string::size_type find = string.find_first_not_of(" \t\r\n"); + if (find != std::string::npos) { + string.erase(0, find); + + /* trim right */ + find = string.find_last_not_of(" \t\r\n"); + if (find != std::string::npos) { + string.erase(find + 1); + } + } else { + string.clear(); + } +} + + XMLWrapper &XMLWrapper::operator=(const XMLWrapper &xml) { cleanup(); @@ -124,7 +142,7 @@ bool XMLWrapper::readXML(const char *xml) return false; } -bool XMLWrapper::getContent(xmlNodePtr node, std::string &content) +bool XMLWrapper::getContent(xmlNodePtr node, std::string &content, bool trim) { content.clear(); @@ -140,6 +158,10 @@ bool XMLWrapper::getContent(xmlNodePtr node, std::string &content) bool result = convertToString(xmlContent, content); xmlFree(xmlContent); + if (result && trim) { + trimString(content); + } + return result; } @@ -160,6 +182,41 @@ bool XMLWrapper::setContent(xmlNodePtr node, const char *content) return true; } +bool XMLWrapper::nodeDump(xmlNodePtr node, std::string &content, bool trim) +{ + content.clear(); + + if (!mDocument) { + return false; + } + + if (!node) { + return false; + } + + bool result = false; + + xmlBufferPtr buffer = xmlBufferCreate(); + if (buffer) { + xmlOutputBufferPtr outputBuffer = xmlOutputBufferCreateBuffer(buffer, NULL); + if (outputBuffer) { + xmlNodeDumpOutput(outputBuffer, mDocument, node, 0, 0, "UTF8"); + xmlOutputBufferClose(outputBuffer); + outputBuffer = NULL; + + result = convertToString(buffer->content, content); + + if (result && trim) { + trimString(content); + } + } + xmlBufferFree(buffer); + buffer = NULL; + } + + return result; +} + std::string XMLWrapper::nodeName(xmlNodePtr node) { std::string name; @@ -229,6 +286,16 @@ bool XMLWrapper::getChildText(xmlNodePtr node, const char *childName, std::strin return false; } + if (getAttr(child, "type") == "xhtml") { + /* search div */ + xmlNodePtr div = findNode(child->children, "div", false); + if (!div) { + return false; + } + + return nodeDump(div, text, true); + } + if (child->children->type != XML_TEXT_NODE) { return false; } diff --git a/plugins/FeedReader/util/XMLWrapper.h b/plugins/FeedReader/util/XMLWrapper.h index 6b94ea796..a815f7a95 100644 --- a/plugins/FeedReader/util/XMLWrapper.h +++ b/plugins/FeedReader/util/XMLWrapper.h @@ -33,6 +33,9 @@ public: XMLWrapper(); ~XMLWrapper(); + // find better place + static void trimString(std::string &string); + XMLWrapper &operator=(const XMLWrapper &xml); void cleanup(); @@ -48,9 +51,11 @@ public: xmlNodePtr findNode(xmlNodePtr node, const char *name, bool children = false); bool getChildText(xmlNodePtr node, const char *childName, std::string &text); - bool getContent(xmlNodePtr node, std::string &content); + bool getContent(xmlNodePtr node, std::string &content, bool trim); bool setContent(xmlNodePtr node, const char *content); + bool nodeDump(xmlNodePtr node, std::string &content, bool trim); + std::string getAttr(xmlNodePtr node, xmlAttrPtr attr); std::string getAttr(xmlNodePtr node, const char *name); bool setAttr(xmlNodePtr node, const char *name, const char *value);