Compare commits

..

No commits in common. "master" and "v0.3.2" have entirely different histories.

8 changed files with 43 additions and 75 deletions

View file

@ -9,7 +9,7 @@ jobs:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: actions/setup-python@v3 - uses: actions/setup-python@v3
with: with:
python-version: "3.13" python-version: "3.10"
- uses: isort/isort-action@master - uses: isort/isort-action@master
with: with:
sortPaths: "./rss" sortPaths: "./rss"

View file

@ -1,6 +1,6 @@
repos: repos:
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0 rev: v4.1.0
hooks: hooks:
- id: trailing-whitespace - id: trailing-whitespace
exclude_types: [markdown] exclude_types: [markdown]
@ -8,13 +8,13 @@ repos:
- id: check-yaml - id: check-yaml
- id: check-added-large-files - id: check-added-large-files
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 25.1.0 rev: 22.3.0
hooks: hooks:
- id: black - id: black
language_version: python3 language_version: python3
files: ^rss/.*\.pyi?$ files: ^rss/.*\.pyi?$
- repo: https://github.com/PyCQA/isort - repo: https://github.com/PyCQA/isort
rev: 6.0.0 rev: 5.10.1
hooks: hooks:
- id: isort - id: isort
files: ^rss/.*\.pyi?$ files: ^rss/.*\.pyi?$

View file

@ -1,30 +1,2 @@
# rss # rss
A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix. A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix.
## Usage
Basic commands:
* `!rss subscribe <url>` - Subscribe the current room to a feed.
* `!rss unsubscribe <feed ID>` - Unsubscribe the current room from a feed.
* `!rss subscriptions` - List subscriptions (and feed IDs) in the current room.
* `!rss notice <feed ID> [true/false]` - Set whether the bot should send new
posts as `m.notice` (if false, they're sent as `m.text`).
* `!rss template <feed ID> [new template]` - Change the post template for a
feed in the current room. If the new template is omitted, the bot replies
with the current template.
### Templates
The default template is `New post in $feed_title: [$title]($link)`.
Templates are interpreted as markdown with some simple variable substitution.
The following variables are available:
* `$feed_url` - The URL that was used to subscribe to the feed.
* `$feed_link` - The home page of the feed.
* `$feed_title` - The title of the feed.
* `$feed_subtitle` - The subtitle of the feed.
* `$id` - The unique ID of the entry.
* `$date` - The date of the entry.
* `$title` - The title of the entry.
* `$summary` - The summary/description of the entry.
* `$link` - The link of the entry.

View file

@ -2,8 +2,6 @@
update_interval: 60 update_interval: 60
# Maximum backoff in minutes when failing to fetch feeds (defaults to 5 days) # Maximum backoff in minutes when failing to fetch feeds (defaults to 5 days)
max_backoff: 7200 max_backoff: 7200
# How many feeds to poll in parallel? Set to 0 to disable limit.
poll_parallelism_limit: 10
# The time to sleep between send requests when broadcasting a new feed entry. # The time to sleep between send requests when broadcasting a new feed entry.
# Set to 0 to disable sleep or -1 to run all requests asynchronously at once. # Set to 0 to disable sleep or -1 to run all requests asynchronously at once.
spam_sleep: 2 spam_sleep: 2

View file

@ -1,6 +1,6 @@
maubot: 0.3.0 maubot: 0.3.0
id: xyz.maubot.rss id: xyz.maubot.rss
version: 0.4.1 version: 0.3.2
license: AGPL-3.0-or-later license: AGPL-3.0-or-later
modules: modules:
- rss - rss

View file

@ -8,4 +8,4 @@ line_length = 99
[tool.black] [tool.black]
line-length = 99 line-length = 99
target-version = ["py310"] target-version = ["py38"]

View file

@ -18,11 +18,9 @@ from __future__ import annotations
from typing import Any, Iterable from typing import Any, Iterable
from datetime import datetime from datetime import datetime
from string import Template from string import Template
from time import mktime, monotonic, time from time import mktime, time
import asyncio import asyncio
import hashlib import hashlib
import html
import io
import aiohttp import aiohttp
import attr import attr
@ -55,7 +53,6 @@ class Config(BaseProxyConfig):
helper.copy("command_prefix") helper.copy("command_prefix")
helper.copy("notification_template") helper.copy("notification_template")
helper.copy("admins") helper.copy("admins")
helper.copy("poll_parallelism_limit")
class BoolArgument(command.Argument): class BoolArgument(command.Argument):
@ -76,7 +73,6 @@ class BoolArgument(command.Argument):
class RSSBot(Plugin): class RSSBot(Plugin):
dbm: DBManager dbm: DBManager
poll_task: asyncio.Future poll_task: asyncio.Future
poll_sema: asyncio.Semaphore | None
http: aiohttp.ClientSession http: aiohttp.ClientSession
power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]] power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]]
@ -90,19 +86,12 @@ class RSSBot(Plugin):
async def start(self) -> None: async def start(self) -> None:
await super().start() await super().start()
self.on_external_config_update() self.config.load_and_update()
self.dbm = DBManager(self.database) self.dbm = DBManager(self.database)
self.http = self.client.api.session self.http = self.client.api.session
self.power_level_cache = {} self.power_level_cache = {}
self.poll_task = asyncio.create_task(self.poll_feeds()) self.poll_task = asyncio.create_task(self.poll_feeds())
def on_external_config_update(self) -> None:
self.config.load_and_update()
poll_parallelism_limit = self.config["poll_parallelism_limit"]
self.poll_sema = (
asyncio.Semaphore(poll_parallelism_limit) if poll_parallelism_limit > 0 else None
)
async def stop(self) -> None: async def stop(self) -> None:
await super().stop() await super().stop()
self.poll_task.cancel() self.poll_task.cancel()
@ -151,7 +140,6 @@ class RSSBot(Plugin):
if not subs: if not subs:
return return
now = int(time()) now = int(time())
start = monotonic()
tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now] tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now]
feed: Feed feed: Feed
entries: Iterable[Entry] entries: Iterable[Entry]
@ -186,8 +174,7 @@ class RSSBot(Plugin):
await self.dbm.add_entries(new_entry_list) await self.dbm.add_entries(new_entry_list)
for entry in new_entry_list: for entry in new_entry_list:
await self._broadcast(feed, entry, feed.subscriptions) await self._broadcast(feed, entry, feed.subscriptions)
duration = monotonic() - now self.log.info(f"Finished polling {len(tasks)} feeds")
self.log.info(f"Finished polling {len(tasks)} feeds in {duration:.2f} seconds")
async def _poll_feeds(self) -> None: async def _poll_feeds(self) -> None:
self.log.debug("Polling started") self.log.debug("Polling started")
@ -201,13 +188,6 @@ class RSSBot(Plugin):
await asyncio.sleep(self.config["update_interval"] * 60) await asyncio.sleep(self.config["update_interval"] * 60)
async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]: async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
if self.poll_sema is not None:
async with self.poll_sema:
return await self._try_parse_feed(feed)
else:
return await self._try_parse_feed(feed)
async def _try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
try: try:
self.log.trace( self.log.trace(
f"Trying to fetch {feed.id} / {feed.url} " f"Trying to fetch {feed.id} / {feed.url} "
@ -275,9 +255,15 @@ class RSSBot(Plugin):
async def _parse_rss( async def _parse_rss(
cls, feed: Feed, resp: aiohttp.ClientResponse cls, feed: Feed, resp: aiohttp.ClientResponse
) -> tuple[Feed, list[Entry]]: ) -> tuple[Feed, list[Entry]]:
content = await resp.read() try:
content = await resp.text()
except UnicodeDecodeError:
try:
content = await resp.text(encoding="utf-8", errors="ignore")
except UnicodeDecodeError:
content = str(await resp.read())[2:-1]
headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"} headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"}
parsed_data = feedparser.parse(io.BytesIO(content), response_headers=headers) parsed_data = feedparser.parse(content, response_headers=headers)
if parsed_data.bozo: if parsed_data.bozo:
if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe): if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe):
raise parsed_data.bozo_exception raise parsed_data.bozo_exception
@ -293,12 +279,12 @@ class RSSBot(Plugin):
feed_id=feed_id, feed_id=feed_id,
id=( id=(
getattr(entry, "id", None) getattr(entry, "id", None)
or getattr(entry, "link", None)
or hashlib.sha1( or hashlib.sha1(
" ".join( " ".join(
[ [
getattr(entry, "title", ""), getattr(entry, "title", ""),
getattr(entry, "description", ""), getattr(entry, "description", ""),
getattr(entry, "link", ""),
] ]
).encode("utf-8") ).encode("utf-8")
).hexdigest() ).hexdigest()
@ -406,7 +392,7 @@ class RSSBot(Plugin):
help="Change the notification template for a subscription in this room", help="Change the notification template for a subscription in this room",
) )
@command.argument("feed_id", "feed ID", parser=int) @command.argument("feed_id", "feed ID", parser=int)
@command.argument("template", "new template", pass_raw=True, required=False) @command.argument("template", "new template", pass_raw=True)
async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None: async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None:
if not await self.can_manage(evt): if not await self.can_manage(evt):
return return
@ -414,15 +400,6 @@ class RSSBot(Plugin):
if not sub: if not sub:
await evt.reply("This room is not subscribed to that feed") await evt.reply("This room is not subscribed to that feed")
return return
if not template:
await evt.reply(
'<p>Current template in this room:</p><pre><code language="markdown">'
f"{html.escape(sub.notification_template.template)}"
"</code></pre>",
allow_html=True,
markdown=False,
)
return
await self.dbm.update_template(feed.id, evt.room_id, template) await self.dbm.update_template(feed.id, evt.room_id, template)
sub = Subscription( sub = Subscription(
feed_id=feed.id, feed_id=feed.id,

View file

@ -25,6 +25,12 @@ import attr
from mautrix.types import RoomID, UserID from mautrix.types import RoomID, UserID
from mautrix.util.async_db import Database, Scheme from mautrix.util.async_db import Database, Scheme
# TODO make this import unconditional after updating mautrix-python
try:
from mautrix.util.async_db import SQLiteCursor
except ImportError:
SQLiteCursor = None
@dataclass @dataclass
class Subscription: class Subscription:
@ -182,6 +188,21 @@ class DBManager:
"INSERT INTO feed (url, title, subtitle, link, next_retry) " "INSERT INTO feed (url, title, subtitle, link, next_retry) "
"VALUES ($1, $2, $3, $4, $5) RETURNING (id)" "VALUES ($1, $2, $3, $4, $5) RETURNING (id)"
) )
# SQLite only gained RETURNING support in v3.35 (2021-03-12)
# TODO remove this special case in a couple of years
if self.db.scheme == Scheme.SQLITE:
cur = await self.db.execute(
q.replace(" RETURNING (id)", ""),
info.url,
info.title,
info.subtitle,
info.link,
info.next_retry,
)
if SQLiteCursor is not None:
assert isinstance(cur, SQLiteCursor)
info.id = cur.lastrowid
else:
info.id = await self.db.fetchval( info.id = await self.db.fetchval(
q, info.url, info.title, info.subtitle, info.link, info.next_retry q, info.url, info.title, info.subtitle, info.link, info.next_retry
) )