FeedReader: Added processing of enclosure in RSS feed

This commit is contained in:
thunder2 2023-05-09 07:49:34 +02:00
parent f9ca6cd3e1
commit ad9d566767
15 changed files with 406 additions and 108 deletions

View file

@ -233,6 +233,13 @@ static void feedMsgToInfo(const RsFeedReaderMsg *msg, FeedMsgInfo &info)
info.description = msg->description;
info.descriptionTransformed = msg->descriptionTransformed;
info.pubDate = msg->pubDate;
info.attachmentLink = msg->attachmentLink;
if (!msg->attachment.empty()) {
p3FeedReaderThread::fromBase64(msg->attachment, info.attachment);
} else {
info.attachment.clear();
}
info.attachmentMimeType = msg->attachmentMimeType;
info.flag.isnew = (msg->flag & RS_FEEDMSG_FLAG_NEW);
info.flag.read = (msg->flag & RS_FEEDMSG_FLAG_READ);
@ -1264,7 +1271,7 @@ bool p3FeedReader::setMessageRead(uint32_t feedId, const std::string &msgId, boo
}
if (changed) {
IndicateConfigChanged(RsConfigMgr::CheckPriority::SAVE_NOW);
IndicateConfigChanged(RsConfigMgr::CheckPriority::SAVE_OFTEN);
if (mNotify) {
mNotify->notifyFeedChanged(feedId, NOTIFY_TYPE_MOD);
mNotify->notifyMsgChanged(feedId, msgId, NOTIFY_TYPE_MOD);
@ -1450,19 +1457,19 @@ int p3FeedReader::tick()
}
// check images
bool imageToShrink = false;
bool imageToOptimze = false;
{
RsStackMutex stack(mImageMutex); /******* LOCK STACK MUTEX *********/
imageToShrink = !mImages.empty();
imageToOptimze = !mImages.empty();
}
if (mNotify) {
for (it = notifyIds.begin(); it != notifyIds.end(); ++it) {
mNotify->notifyFeedChanged(*it, NOTIFY_TYPE_MOD);
}
if (imageToShrink) {
mNotify->notifyShrinkImage();
if (imageToOptimze) {
mNotify->notifyOptimizeImage();
}
}
@ -2110,6 +2117,9 @@ void p3FeedReader::onProcessSuccess_addMsgs(uint32_t feedId, std::list<RsFeedRea
}
miNew->description.clear();
miNew->descriptionTransformed.clear();
miNew->attachmentLink.clear();
miNew->attachment.clear();
miNew->attachmentMimeType.clear();
} else {
miNew->flag = RS_FEEDMSG_FLAG_NEW;
addedMsgs.push_back(miNew->msgId);
@ -2151,6 +2161,25 @@ void p3FeedReader::onProcessSuccess_addMsgs(uint32_t feedId, std::list<RsFeedRea
if (!mi.link.empty()) {
description += "<br><a href=\"" + mi.link + "\">" + mi.link + "</a>";
}
if (!mi.attachmentBinary.empty()) {
if (p3FeedReaderThread::isContentType(mi.attachmentMimeType, "image/")) {
/* add attachement to description */
// optimize image
std::vector<unsigned char> optimizedImage;
std::string optimizedMimeType;
if (optimizeImage(FeedReaderOptimizeImageTask::SIZE, mi.attachmentBinary, mi.attachmentBinaryMimeType, optimizedImage, optimizedMimeType)) {
std::string base64;
if (p3FeedReaderThread::toBase64(optimizedImage, base64)) {
std::string imageBase64;
rs_sprintf(imageBase64, "data:%s;base64,%s", optimizedMimeType.c_str(), base64.c_str());
description += "<br><img src=\"" + imageBase64 + "\"/>";
}
}
}
}
forumMsg.mMsg = description;
uint32_t token;
@ -2197,10 +2226,11 @@ void p3FeedReader::onProcessSuccess_addMsgs(uint32_t feedId, std::list<RsFeedRea
if (!mi.postedFirstImage.empty()) {
/* use first image as image for posted and description without image as notes */
if (feedFlag & RS_FEED_FLAG_POSTED_SHRINK_IMAGE) {
// shrink image
std::vector<unsigned char> shrinkedImage;
if (shrinkImage(FeedReaderShrinkImageTask::POSTED, mi.postedFirstImage, shrinkedImage)) {
postedPost.mImage.copy(shrinkedImage.data(), shrinkedImage.size());
// optimize image
std::vector<unsigned char> optimizedImage;
std::string optimizedMimeType;
if (optimizeImage(FeedReaderOptimizeImageTask::POSTED, mi.postedFirstImage, mi.postedFirstImageMimeType, optimizedImage, optimizedMimeType)) {
postedPost.mImage.copy(optimizedImage.data(), optimizedImage.size());
}
} else {
postedPost.mImage.copy(mi.postedFirstImage.data(), mi.postedFirstImage.size());
@ -2219,6 +2249,23 @@ void p3FeedReader::onProcessSuccess_addMsgs(uint32_t feedId, std::list<RsFeedRea
}
} else {
description = mi.descriptionTransformed.empty() ? mi.description : mi.descriptionTransformed;
if (!mi.attachmentBinary.empty()) {
if (p3FeedReaderThread::isContentType(mi.attachmentMimeType, "image/")) {
/* use attachement as image */
if (feedFlag & RS_FEED_FLAG_POSTED_SHRINK_IMAGE) {
// optimize image
std::vector<unsigned char> optimizedImage;
std::string optimizedMimeType;
if (optimizeImage(FeedReaderOptimizeImageTask::POSTED, mi.attachmentBinary, mi.attachmentBinaryMimeType, optimizedImage, optimizedMimeType)) {
postedPost.mImage.copy(optimizedImage.data(), optimizedImage.size());
}
} else {
postedPost.mImage.copy(mi.attachmentBinary.data(), mi.attachmentBinary.size());
}
}
}
}
postedPost.mNotes = description;
@ -2662,17 +2709,17 @@ bool p3FeedReader::getPostedGroups(std::vector<RsPostedGroup> &groups, bool only
return true;
}
bool p3FeedReader::shrinkImage(FeedReaderShrinkImageTask::Type type, const std::vector<unsigned char> &image, std::vector<unsigned char> &resultImage)
bool p3FeedReader::optimizeImage(FeedReaderOptimizeImageTask::Type type, const std::vector<unsigned char> &image, const std::string &mimeType, std::vector<unsigned char> &resultImage, std::string &resultMimeType)
{
if (!mNotify) {
return false;
}
FeedReaderShrinkImageTask *shrinkImageTask = new FeedReaderShrinkImageTask(type, image);
FeedReaderOptimizeImageTask *optimizeImageTask = new FeedReaderOptimizeImageTask(type, image, mimeType);
{
RsStackMutex stack(mImageMutex); /******* LOCK STACK MUTEX *********/
mImages.push_back(shrinkImageTask);
mImages.push_back(optimizeImageTask);
}
/* Wait until task is complete */
@ -2686,11 +2733,11 @@ bool p3FeedReader::shrinkImage(FeedReaderShrinkImageTask::Type type, const std::
if (++nSeconds >= 30) {
// timeout
std::list<FeedReaderShrinkImageTask*>::iterator it = std::find(mImages.begin(), mImages.end(), shrinkImageTask);
std::list<FeedReaderOptimizeImageTask*>::iterator it = std::find(mImages.begin(), mImages.end(), optimizeImageTask);
if (it != mImages.end()) {
mImages.erase(it);
delete(shrinkImageTask);
delete(optimizeImageTask);
return false;
}
@ -2701,16 +2748,17 @@ bool p3FeedReader::shrinkImage(FeedReaderShrinkImageTask::Type type, const std::
{
RsStackMutex stack(mImageMutex); /******* LOCK STACK MUTEX *********/
std::list<FeedReaderShrinkImageTask*>::iterator it = std::find(mResultImages.begin(), mResultImages.end(), shrinkImageTask);
std::list<FeedReaderOptimizeImageTask*>::iterator it = std::find(mResultImages.begin(), mResultImages.end(), optimizeImageTask);
if (it != mResultImages.end()) {
mResultImages.erase(it);
bool result = shrinkImageTask->mResult;
bool result = optimizeImageTask->mResult;
if (result) {
resultImage = shrinkImageTask->mImageResult;
resultImage = optimizeImageTask->mImageResult;
resultMimeType = optimizeImageTask->mMimeTypeResult;
}
delete(shrinkImageTask);
delete(optimizeImageTask);
return result;
}
@ -2720,7 +2768,7 @@ bool p3FeedReader::shrinkImage(FeedReaderShrinkImageTask::Type type, const std::
return false;
}
FeedReaderShrinkImageTask *p3FeedReader::getShrinkImageTask()
FeedReaderOptimizeImageTask *p3FeedReader::getOptimizeImageTask()
{
RsStackMutex stack(mImageMutex); /******* LOCK STACK MUTEX *********/
@ -2728,19 +2776,19 @@ FeedReaderShrinkImageTask *p3FeedReader::getShrinkImageTask()
return NULL;
}
FeedReaderShrinkImageTask *imageResize = mImages.front();
FeedReaderOptimizeImageTask *imageResize = mImages.front();
mImages.pop_front();
return imageResize;
}
void p3FeedReader::setShrinkImageTaskResult(FeedReaderShrinkImageTask *shrinkImageTask)
void p3FeedReader::setOptimizeImageTaskResult(FeedReaderOptimizeImageTask *optimizeImageTask)
{
if (!shrinkImageTask) {
if (!optimizeImageTask) {
return;
}
RsStackMutex stack(mImageMutex); /******* LOCK STACK MUTEX *********/
mResultImages.push_back(shrinkImageTask);
mResultImages.push_back(optimizeImageTask);
}

View file

@ -80,8 +80,8 @@ public:
virtual bool getForumGroups(std::vector<RsGxsForumGroup> &groups, bool onlyOwn);
virtual bool getPostedGroups(std::vector<RsPostedGroup> &groups, bool onlyOwn);
virtual FeedReaderShrinkImageTask *getShrinkImageTask();
virtual void setShrinkImageTaskResult(FeedReaderShrinkImageTask *shrinkedImageTask);
virtual FeedReaderOptimizeImageTask *getOptimizeImageTask();
virtual void setOptimizeImageTaskResult(FeedReaderOptimizeImageTask *optimizeImageTask);
virtual RsFeedReaderErrorState processXPath(const std::list<std::string> &xpathsToUse, const std::list<std::string> &xpathsToRemove, std::string &description, std::string &errorString);
virtual RsFeedReaderErrorState processXslt(const std::string &xslt, std::string &description, std::string &errorString);
@ -107,7 +107,7 @@ public:
bool getPostedGroup(const RsGxsGroupId &groupId, RsPostedGroup &postedGroup);
bool updatePostedGroup(const RsPostedGroup &postedGroup, const std::string &groupName, const std::string &groupDescription);
bool waitForToken(RsGxsIfaceHelper *interface, uint32_t token);
bool shrinkImage(FeedReaderShrinkImageTask::Type type, const std::vector<unsigned char> &image, std::vector<unsigned char> &resultImage);
bool optimizeImage(FeedReaderOptimizeImageTask::Type type, const std::vector<unsigned char> &image, const std::string &mimeType, std::vector<unsigned char> &resultImage, std::string &resultMimeType);
protected:
/****************** p3Config STUFF *******************/
@ -150,8 +150,8 @@ private:
std::list<uint32_t> mProcessFeeds;
RsMutex mImageMutex;
std::list<FeedReaderShrinkImageTask*> mImages;
std::list<FeedReaderShrinkImageTask*> mResultImages;
std::list<FeedReaderOptimizeImageTask*> mImages;
std::list<FeedReaderOptimizeImageTask*> mResultImages;
RsMutex mPreviewMutex;
p3FeedReaderThread *mPreviewDownloadThread;

View file

@ -89,7 +89,7 @@ void p3FeedReaderThread::threadTick()
/* first, filter the messages */
mFeedReader->onProcessSuccess_filterMsg(feed.feedId, msgs);
if (isRunning()) {
/* second, process the descriptions */
/* second, process the descriptions and attachment */
for (it = msgs.begin(); it != msgs.end(); ) {
if (!isRunning()) {
break;
@ -153,12 +153,12 @@ void p3FeedReaderThread::threadTick()
/****************************** Download ***********************************/
/***************************************************************************/
static bool isContentType(const std::string &contentType, const char *type)
bool p3FeedReaderThread::isContentType(const std::string &contentType, const char *type)
{
return (strncasecmp(contentType.c_str(), type, strlen(type)) == 0);
}
static bool toBase64(const std::vector<unsigned char> &data, std::string &base64)
bool p3FeedReaderThread::toBase64(const std::vector<unsigned char> &data, std::string &base64)
{
bool result = false;
@ -187,6 +187,28 @@ static bool toBase64(const std::vector<unsigned char> &data, std::string &base64
return result;
}
bool p3FeedReaderThread::fromBase64(const std::string &base64, std::vector<unsigned char> &data)
{
bool result = false;
BIO *b64 = BIO_new(BIO_f_base64());
if (b64) {
BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
BIO *source = BIO_new_mem_buf(base64.c_str(), -1); // read-only source
if (source) {
BIO_push(b64, source);
const int maxlen = base64.length() / 4 * 3 + 1;
data.resize(maxlen);
const int len = BIO_read(b64, data.data(), maxlen);
data.resize(len);
result = true;
}
BIO_free_all(b64);
}
return result;
}
static std::string getBaseLink(std::string link)
{
size_t found = link.rfind('/');
@ -254,12 +276,12 @@ static bool getFavicon(CURLWrapper &CURL, const std::string &url, std::string &i
if (code == CURLE_OK) {
if (CURL.responseCode() == 200) {
std::string contentType = CURL.contentType();
if (isContentType(contentType, "image/") ||
isContentType(contentType, "application/octet-stream") ||
isContentType(contentType, "text/plain")) {
if (p3FeedReaderThread::isContentType(contentType, "image/") ||
p3FeedReaderThread::isContentType(contentType, "application/octet-stream") ||
p3FeedReaderThread::isContentType(contentType, "text/plain")) {
if (!vicon.empty()) {
#warning p3FeedReaderThread.cc TODO thunder2: check it
result = toBase64(vicon, icon);
result = p3FeedReaderThread::toBase64(vicon, icon);
}
}
}
@ -971,6 +993,19 @@ RsFeedReaderErrorState p3FeedReaderThread::process(const RsFeedReaderFeed &feed,
item->pubDate = time(NULL);
}
if (feedFormat == FORMAT_RSS) {
/* <enclosure url="" type=""></enclosure> */
xmlNodePtr enclosure = xml.findNode(node->children, "enclosure", false);
if (enclosure) {
std::string enclosureMimeType = xml.getAttr(enclosure, "type");
std::string enclosureUrl = xml.getAttr(enclosure, "url");
if (!enclosureUrl.empty()) {
item->attachmentLink = enclosureUrl;
item->attachmentMimeType = enclosureMimeType;
}
}
}
entries.push_back(item);
}
} else {
@ -1025,6 +1060,40 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe
RsFeedReaderErrorState result = RS_FEED_ERRORSTATE_OK;
std::string proxy = getProxyForFeed(feed);
/* attachment */
if (!msg->attachmentLink.empty()) {
if (isContentType(msg->attachmentMimeType, "image/")) {
CURLWrapper CURL(proxy);
CURLcode code = CURL.downloadBinary(msg->attachmentLink, msg->attachmentBinary);
if (code == CURLE_OK && CURL.responseCode() == 200) {
std::string contentType = CURL.contentType();
if (isContentType(contentType, "image/")) {
msg->attachmentBinaryMimeType = contentType;
bool forum = (feed.flag & RS_FEED_FLAG_FORUM) && !feed.preview;
bool posted = (feed.flag & RS_FEED_FLAG_POSTED) && !feed.preview;
if (!forum && ! posted) {
/* no need to optimize image */
std::vector<unsigned char> optimizedBinary;
std::string optimizedMimeType;
if (mFeedReader->optimizeImage(FeedReaderOptimizeImageTask::SIZE, msg->attachmentBinary, msg->attachmentBinaryMimeType, optimizedBinary, optimizedMimeType)) {
if (toBase64(optimizedBinary, msg->attachment)) {
msg->attachmentMimeType = optimizedMimeType;
} else {
msg->attachment.clear();
}
}
}
} else {
msg->attachmentBinary.clear();
}
} else {
msg->attachmentBinary.clear();
}
}
}
std::string url;
if (feed.flag & RS_FEED_FLAG_SAVE_COMPLETE_PAGE) {
#ifdef FEEDREADER_DEBUG
@ -1083,6 +1152,10 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe
if (isRunning()) {
/* process description */
bool processPostedFirstImage = (feed.flag & RS_FEED_FLAG_POSTED_FIRST_IMAGE) ? TRUE : FALSE;
if (!msg->attachmentBinary.empty()) {
/* use attachment as image */
processPostedFirstImage = FALSE;
}
//long todo; // encoding
HTMLWrapper html;
@ -1215,20 +1288,24 @@ RsFeedReaderErrorState p3FeedReaderThread::processMsg(const RsFeedReaderFeed &fe
if (code == CURLE_OK && CURL.responseCode() == 200) {
std::string contentType = CURL.contentType();
if (isContentType(contentType, "image/")) {
std::string base64;
if (toBase64(data, base64)) {
std::string imageBase64;
rs_sprintf(imageBase64, "data:%s;base64,%s", contentType.c_str(), base64.c_str());
if (html.setAttr(node, "src", imageBase64.c_str())) {
removeImage = false;
if (processPostedFirstImage && postedFirstImageNode == NULL) {
/* set first image */
msg->postedFirstImage = data;
postedFirstImageNode = node;
std::vector<unsigned char> optimizedData;
std::string optimizedMimeType;
if (mFeedReader->optimizeImage(FeedReaderOptimizeImageTask::SIZE, data, contentType, optimizedData, optimizedMimeType)) {
std::string base64;
if (toBase64(optimizedData, base64)) {
std::string imageBase64;
rs_sprintf(imageBase64, "data:%s;base64,%s", optimizedMimeType.c_str(), base64.c_str());
if (html.setAttr(node, "src", imageBase64.c_str())) {
removeImage = false;
}
}
}
if (processPostedFirstImage && postedFirstImageNode == NULL) {
/* set first image */
msg->postedFirstImage = data;
msg->postedFirstImageMimeType = contentType;
postedFirstImageNode = node;
}
}
}
}

View file

@ -54,6 +54,11 @@ public:
static RsFeedReaderErrorState processXslt(const std::string &xslt, HTMLWrapper &html, std::string &errorString);
static RsFeedReaderErrorState processTransformation(const RsFeedReaderFeed &feed, RsFeedReaderMsg *msg, std::string &errorString);
static bool isContentType(const std::string &contentType, const char *type);
static bool toBase64(const std::vector<unsigned char> &data, std::string &base64);
static bool fromBase64(const std::string &base64, std::vector<unsigned char> &data);
private:
virtual void threadTick() override; /// @see RsTickingThread

View file

@ -274,6 +274,9 @@ void RsFeedReaderMsg::clear()
descriptionTransformed.clear();
pubDate = 0;
flag = 0;
attachmentLink.clear();
attachment.clear();
attachmentMimeType.clear();
}
std::ostream &RsFeedReaderMsg::print(std::ostream &out, uint16_t /*indent*/)
@ -294,6 +297,9 @@ uint32_t RsFeedReaderSerialiser::sizeMsg(RsFeedReaderMsg *item)
s += GetTlvStringSize(item->descriptionTransformed);
s += sizeof(uint32_t); /* pubDate */
s += sizeof(uint32_t); /* flag */
s += GetTlvStringSize(item->attachmentLink);
s += GetTlvStringSize(item->attachment);
s += GetTlvStringSize(item->attachmentMimeType);
return s;
}
@ -317,7 +323,7 @@ bool RsFeedReaderSerialiser::serialiseMsg(RsFeedReaderMsg *item, void *data, uin
offset += 8;
/* add values */
ok &= setRawUInt16(data, tlvsize, &offset, 2); /* version */
ok &= setRawUInt16(data, tlvsize, &offset, 3); /* version */
ok &= SetTlvString(data, tlvsize, &offset, TLV_TYPE_STR_GENID, item->msgId);
ok &= setRawUInt32(data, tlvsize, &offset, item->feedId);
ok &= SetTlvString(data, tlvsize, &offset, TLV_TYPE_STR_NAME, item->title);
@ -327,6 +333,9 @@ bool RsFeedReaderSerialiser::serialiseMsg(RsFeedReaderMsg *item, void *data, uin
ok &= SetTlvString(data, tlvsize, &offset, TLV_TYPE_STR_COMMENT, item->descriptionTransformed);
ok &= setRawUInt32(data, tlvsize, &offset, item->pubDate);
ok &= setRawUInt32(data, tlvsize, &offset, item->flag);
ok &= SetTlvString(data, tlvsize, &offset, TLV_TYPE_STR_LOCATION, item->attachmentLink);
ok &= SetTlvString(data, tlvsize, &offset, TLV_TYPE_STR_PIC_AUTH, item->attachment);
ok &= SetTlvString(data, tlvsize, &offset, TLV_TYPE_STR_PIC_TYPE, item->attachmentMimeType);
if (offset != tlvsize)
{
@ -390,6 +399,11 @@ RsFeedReaderMsg *RsFeedReaderSerialiser::deserialiseMsg(void *data, uint32_t *pk
}
ok &= getRawUInt32(data, rssize, &offset, (uint32_t*) &(item->pubDate));
ok &= getRawUInt32(data, rssize, &offset, &(item->flag));
if (version >= 3) {
ok &= GetTlvString(data, rssize, &offset, TLV_TYPE_STR_LOCATION, item->attachmentLink);
ok &= GetTlvString(data, rssize, &offset, TLV_TYPE_STR_PIC_AUTH, item->attachment);
ok &= GetTlvString(data, rssize, &offset, TLV_TYPE_STR_PIC_TYPE, item->attachmentMimeType);
}
if (offset != rssize)
{

View file

@ -124,9 +124,15 @@ public:
std::string descriptionTransformed;
time_t pubDate;
uint32_t flag; // RS_FEEDMSG_FLAG_...
std::string attachmentLink;
std::string attachment; // binary as base64
std::string attachmentMimeType;
// Only in memory when receiving messages
std::vector<unsigned char> attachmentBinary;
std::string attachmentBinaryMimeType;
std::vector<unsigned char> postedFirstImage;
std::string postedFirstImageMimeType;
std::string postedDescriptionWithoutFirstImage;
};