Compare commits

..

No commits in common. "master" and "v0.3.2" have entirely different histories.

8 changed files with 43 additions and 75 deletions

View file

@ -9,7 +9,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: "3.13"
python-version: "3.10"
- uses: isort/isort-action@master
with:
sortPaths: "./rss"

View file

@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
rev: v4.1.0
hooks:
- id: trailing-whitespace
exclude_types: [markdown]
@ -8,13 +8,13 @@ repos:
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 25.1.0
rev: 22.3.0
hooks:
- id: black
language_version: python3
files: ^rss/.*\.pyi?$
- repo: https://github.com/PyCQA/isort
rev: 6.0.0
rev: 5.10.1
hooks:
- id: isort
files: ^rss/.*\.pyi?$

View file

@ -1,30 +1,2 @@
# rss
A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix.
## Usage
Basic commands:
* `!rss subscribe <url>` - Subscribe the current room to a feed.
* `!rss unsubscribe <feed ID>` - Unsubscribe the current room from a feed.
* `!rss subscriptions` - List subscriptions (and feed IDs) in the current room.
* `!rss notice <feed ID> [true/false]` - Set whether the bot should send new
posts as `m.notice` (if false, they're sent as `m.text`).
* `!rss template <feed ID> [new template]` - Change the post template for a
feed in the current room. If the new template is omitted, the bot replies
with the current template.
### Templates
The default template is `New post in $feed_title: [$title]($link)`.
Templates are interpreted as markdown with some simple variable substitution.
The following variables are available:
* `$feed_url` - The URL that was used to subscribe to the feed.
* `$feed_link` - The home page of the feed.
* `$feed_title` - The title of the feed.
* `$feed_subtitle` - The subtitle of the feed.
* `$id` - The unique ID of the entry.
* `$date` - The date of the entry.
* `$title` - The title of the entry.
* `$summary` - The summary/description of the entry.
* `$link` - The link of the entry.

View file

@ -2,8 +2,6 @@
update_interval: 60
# Maximum backoff in minutes when failing to fetch feeds (defaults to 5 days)
max_backoff: 7200
# How many feeds to poll in parallel? Set to 0 to disable limit.
poll_parallelism_limit: 10
# The time to sleep between send requests when broadcasting a new feed entry.
# Set to 0 to disable sleep or -1 to run all requests asynchronously at once.
spam_sleep: 2

View file

@ -1,6 +1,6 @@
maubot: 0.3.0
id: xyz.maubot.rss
version: 0.4.1
version: 0.3.2
license: AGPL-3.0-or-later
modules:
- rss

View file

@ -8,4 +8,4 @@ line_length = 99
[tool.black]
line-length = 99
target-version = ["py310"]
target-version = ["py38"]

View file

@ -18,11 +18,9 @@ from __future__ import annotations
from typing import Any, Iterable
from datetime import datetime
from string import Template
from time import mktime, monotonic, time
from time import mktime, time
import asyncio
import hashlib
import html
import io
import aiohttp
import attr
@ -55,7 +53,6 @@ class Config(BaseProxyConfig):
helper.copy("command_prefix")
helper.copy("notification_template")
helper.copy("admins")
helper.copy("poll_parallelism_limit")
class BoolArgument(command.Argument):
@ -76,7 +73,6 @@ class BoolArgument(command.Argument):
class RSSBot(Plugin):
dbm: DBManager
poll_task: asyncio.Future
poll_sema: asyncio.Semaphore | None
http: aiohttp.ClientSession
power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]]
@ -90,19 +86,12 @@ class RSSBot(Plugin):
async def start(self) -> None:
await super().start()
self.on_external_config_update()
self.config.load_and_update()
self.dbm = DBManager(self.database)
self.http = self.client.api.session
self.power_level_cache = {}
self.poll_task = asyncio.create_task(self.poll_feeds())
def on_external_config_update(self) -> None:
self.config.load_and_update()
poll_parallelism_limit = self.config["poll_parallelism_limit"]
self.poll_sema = (
asyncio.Semaphore(poll_parallelism_limit) if poll_parallelism_limit > 0 else None
)
async def stop(self) -> None:
await super().stop()
self.poll_task.cancel()
@ -151,7 +140,6 @@ class RSSBot(Plugin):
if not subs:
return
now = int(time())
start = monotonic()
tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now]
feed: Feed
entries: Iterable[Entry]
@ -186,8 +174,7 @@ class RSSBot(Plugin):
await self.dbm.add_entries(new_entry_list)
for entry in new_entry_list:
await self._broadcast(feed, entry, feed.subscriptions)
duration = monotonic() - now
self.log.info(f"Finished polling {len(tasks)} feeds in {duration:.2f} seconds")
self.log.info(f"Finished polling {len(tasks)} feeds")
async def _poll_feeds(self) -> None:
self.log.debug("Polling started")
@ -201,13 +188,6 @@ class RSSBot(Plugin):
await asyncio.sleep(self.config["update_interval"] * 60)
async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
if self.poll_sema is not None:
async with self.poll_sema:
return await self._try_parse_feed(feed)
else:
return await self._try_parse_feed(feed)
async def _try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
try:
self.log.trace(
f"Trying to fetch {feed.id} / {feed.url} "
@ -275,9 +255,15 @@ class RSSBot(Plugin):
async def _parse_rss(
cls, feed: Feed, resp: aiohttp.ClientResponse
) -> tuple[Feed, list[Entry]]:
content = await resp.read()
try:
content = await resp.text()
except UnicodeDecodeError:
try:
content = await resp.text(encoding="utf-8", errors="ignore")
except UnicodeDecodeError:
content = str(await resp.read())[2:-1]
headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"}
parsed_data = feedparser.parse(io.BytesIO(content), response_headers=headers)
parsed_data = feedparser.parse(content, response_headers=headers)
if parsed_data.bozo:
if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe):
raise parsed_data.bozo_exception
@ -293,12 +279,12 @@ class RSSBot(Plugin):
feed_id=feed_id,
id=(
getattr(entry, "id", None)
or getattr(entry, "link", None)
or hashlib.sha1(
" ".join(
[
getattr(entry, "title", ""),
getattr(entry, "description", ""),
getattr(entry, "link", ""),
]
).encode("utf-8")
).hexdigest()
@ -406,7 +392,7 @@ class RSSBot(Plugin):
help="Change the notification template for a subscription in this room",
)
@command.argument("feed_id", "feed ID", parser=int)
@command.argument("template", "new template", pass_raw=True, required=False)
@command.argument("template", "new template", pass_raw=True)
async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None:
if not await self.can_manage(evt):
return
@ -414,15 +400,6 @@ class RSSBot(Plugin):
if not sub:
await evt.reply("This room is not subscribed to that feed")
return
if not template:
await evt.reply(
'<p>Current template in this room:</p><pre><code language="markdown">'
f"{html.escape(sub.notification_template.template)}"
"</code></pre>",
allow_html=True,
markdown=False,
)
return
await self.dbm.update_template(feed.id, evt.room_id, template)
sub = Subscription(
feed_id=feed.id,

View file

@ -25,6 +25,12 @@ import attr
from mautrix.types import RoomID, UserID
from mautrix.util.async_db import Database, Scheme
# TODO make this import unconditional after updating mautrix-python
try:
from mautrix.util.async_db import SQLiteCursor
except ImportError:
SQLiteCursor = None
@dataclass
class Subscription:
@ -182,9 +188,24 @@ class DBManager:
"INSERT INTO feed (url, title, subtitle, link, next_retry) "
"VALUES ($1, $2, $3, $4, $5) RETURNING (id)"
)
info.id = await self.db.fetchval(
q, info.url, info.title, info.subtitle, info.link, info.next_retry
)
# SQLite only gained RETURNING support in v3.35 (2021-03-12)
# TODO remove this special case in a couple of years
if self.db.scheme == Scheme.SQLITE:
cur = await self.db.execute(
q.replace(" RETURNING (id)", ""),
info.url,
info.title,
info.subtitle,
info.link,
info.next_retry,
)
if SQLiteCursor is not None:
assert isinstance(cur, SQLiteCursor)
info.id = cur.lastrowid
else:
info.id = await self.db.fetchval(
q, info.url, info.title, info.subtitle, info.link, info.next_retry
)
return info
async def set_backoff(self, info: Feed, error_count: int, next_retry: int) -> None: