Compare commits

...

13 Commits

Author SHA1 Message Date
Tulir Asokan
81ec8ed864 Bump version to 0.4.1 2025-01-30 15:53:50 +02:00
Tulir Asokan
72d08096b7 Pass raw data to feedparser. Fixes #59 2025-01-30 15:47:09 +02:00
Tulir Asokan
68e5a84096 Bump version to v0.4.0 2025-01-30 14:10:05 +02:00
Tulir Asokan
f62b0335dd Update linters 2025-01-30 14:08:04 +02:00
Tulir Asokan
a8f1340125 Update feedparser input 2025-01-30 14:07:57 +02:00
Tulir Asokan
b58202ebfb
Merge pull request #35 from AndrewKvalheim/entry-id-fallback
Stabilize entry IDs
2023-02-21 13:24:15 +02:00
Tulir Asokan
eeb71a008f Fix formatting 2023-02-21 13:22:19 +02:00
Tulir Asokan
ef4915e434 Add usage to readme 2023-02-21 12:47:13 +02:00
Tulir Asokan
1a52d18f59 Show current template if ran without arguments 2023-02-21 12:43:32 +02:00
Tulir Asokan
f12d32ad3c Bump version to 0.3.2 2022-10-03 09:25:35 +03:00
Andrew Kvalheim
03bb128005 Key entries by link if missing ID
Resolves the problem of incorrectly duplicated entries in feeds that
update content but don’t explicitly provide entry IDs. Example feed:

  - https://www.to-rss.xyz/wikipedia/current_events/

Example entry:

    <item>
      <title>Current events: 2022-07-13</title>
      <link>https://en.wikipedia.org/wiki/Portal:Current_events/2022_July_13</link>
      <description>[VARIABLE CONTENT]</description>
      <pubDate>Wed, 13 Jul 2022 00:00:00 -0000</pubDate>
      </item>
    <item>

This behavior is suggested by the common practice of using an entry’s
link as its ID value, and is consistent with typical feed aggregators
such as Feedbin and Inoreader.
2022-07-14 11:05:40 -07:00
Tulir Asokan
30ad459870 Move CI script to main maubot repo 2022-06-19 14:27:42 +03:00
Tulir Asokan
877dcffb9c Use custom user agent 2022-06-18 17:47:03 +03:00
7 changed files with 60 additions and 47 deletions

View File

@ -9,7 +9,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: "3.10"
python-version: "3.13"
- uses: isort/isort-action@master
with:
sortPaths: "./rss"

View File

@ -1,29 +1,3 @@
image: dock.mau.dev/maubot/maubot
stages:
- build
variables:
PYTHONPATH: /opt/maubot
build:
stage: build
except:
- tags
script:
- python3 -m maubot.cli build -o xyz.maubot.$CI_PROJECT_NAME-$CI_COMMIT_REF_NAME-$CI_COMMIT_SHORT_SHA.mbp
artifacts:
paths:
- "*.mbp"
expire_in: 365 days
build tags:
stage: build
only:
- tags
script:
- python3 -m maubot.cli build -o xyz.maubot.$CI_PROJECT_NAME-$CI_COMMIT_TAG.mbp
artifacts:
paths:
- "*.mbp"
expire_in: never
include:
- project: 'maubot/maubot'
file: '/.gitlab-ci-plugin.yml'

View File

@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude_types: [markdown]
@ -8,13 +8,13 @@ repos:
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 22.3.0
rev: 25.1.0
hooks:
- id: black
language_version: python3
files: ^rss/.*\.pyi?$
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
rev: 6.0.0
hooks:
- id: isort
files: ^rss/.*\.pyi?$

View File

@ -1,2 +1,30 @@
# rss
A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix.
## Usage
Basic commands:
* `!rss subscribe <url>` - Subscribe the current room to a feed.
* `!rss unsubscribe <feed ID>` - Unsubscribe the current room from a feed.
* `!rss subscriptions` - List subscriptions (and feed IDs) in the current room.
* `!rss notice <feed ID> [true/false]` - Set whether the bot should send new
posts as `m.notice` (if false, they're sent as `m.text`).
* `!rss template <feed ID> [new template]` - Change the post template for a
feed in the current room. If the new template is omitted, the bot replies
with the current template.
### Templates
The default template is `New post in $feed_title: [$title]($link)`.
Templates are interpreted as markdown with some simple variable substitution.
The following variables are available:
* `$feed_url` - The URL that was used to subscribe to the feed.
* `$feed_link` - The home page of the feed.
* `$feed_title` - The title of the feed.
* `$feed_subtitle` - The subtitle of the feed.
* `$id` - The unique ID of the entry.
* `$date` - The date of the entry.
* `$title` - The title of the entry.
* `$summary` - The summary/description of the entry.
* `$link` - The link of the entry.

View File

@ -1,6 +1,6 @@
maubot: 0.3.0
id: xyz.maubot.rss
version: 0.3.1
version: 0.4.1
license: AGPL-3.0-or-later
modules:
- rss

View File

@ -8,4 +8,4 @@ line_length = 99
[tool.black]
line-length = 99
target-version = ["py38"]
target-version = ["py310"]

View File

@ -21,12 +21,14 @@ from string import Template
from time import mktime, time
import asyncio
import hashlib
import html
import io
import aiohttp
import attr
import feedparser
from maubot import MessageEvent, Plugin
from maubot import MessageEvent, Plugin, __version__ as maubot_version
from maubot.handlers import command, event
from mautrix.types import (
EventID,
@ -198,6 +200,12 @@ class RSSBot(Plugin):
self.log.warning(f"Failed to parse feed {feed.id} / {feed.url}: {e}")
return feed, []
@property
def _feed_get_headers(self) -> dict[str, str]:
return {
"User-Agent": f"maubot/{maubot_version} +https://github.com/maubot/rss",
}
async def parse_feed(
self, *, feed: Feed | None = None, url: str | None = None
) -> tuple[Feed, list[Entry]]:
@ -207,7 +215,7 @@ class RSSBot(Plugin):
feed = Feed(id=-1, url=url, title="", subtitle="", link="")
elif url is not None:
raise ValueError("Only one of feed or url must be set")
resp = await self.http.get(feed.url)
resp = await self.http.get(feed.url, headers=self._feed_get_headers)
ct = resp.headers["Content-Type"].split(";")[0].strip()
if ct == "application/json" or ct == "application/feed+json":
return await self._parse_json(feed, resp)
@ -249,15 +257,9 @@ class RSSBot(Plugin):
async def _parse_rss(
cls, feed: Feed, resp: aiohttp.ClientResponse
) -> tuple[Feed, list[Entry]]:
try:
content = await resp.text()
except UnicodeDecodeError:
try:
content = await resp.text(encoding="utf-8", errors="ignore")
except UnicodeDecodeError:
content = str(await resp.read())[2:-1]
content = await resp.read()
headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"}
parsed_data = feedparser.parse(content, response_headers=headers)
parsed_data = feedparser.parse(io.BytesIO(content), response_headers=headers)
if parsed_data.bozo:
if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe):
raise parsed_data.bozo_exception
@ -273,12 +275,12 @@ class RSSBot(Plugin):
feed_id=feed_id,
id=(
getattr(entry, "id", None)
or getattr(entry, "link", None)
or hashlib.sha1(
" ".join(
[
getattr(entry, "title", ""),
getattr(entry, "description", ""),
getattr(entry, "link", ""),
]
).encode("utf-8")
).hexdigest()
@ -386,7 +388,7 @@ class RSSBot(Plugin):
help="Change the notification template for a subscription in this room",
)
@command.argument("feed_id", "feed ID", parser=int)
@command.argument("template", "new template", pass_raw=True)
@command.argument("template", "new template", pass_raw=True, required=False)
async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None:
if not await self.can_manage(evt):
return
@ -394,6 +396,15 @@ class RSSBot(Plugin):
if not sub:
await evt.reply("This room is not subscribed to that feed")
return
if not template:
await evt.reply(
'<p>Current template in this room:</p><pre><code language="markdown">'
f"{html.escape(sub.notification_template.template)}"
"</code></pre>",
allow_html=True,
markdown=False,
)
return
await self.dbm.update_template(feed.id, evt.room_id, template)
sub = Subscription(
feed_id=feed.id,