From 08ff28bf30c1e92379d84765e3fde284f6075805 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sun, 28 Nov 2021 15:35:44 +0200 Subject: [PATCH 01/25] Update CI artifact expiry --- .gitlab-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c649b91..45ef06b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,6 +15,7 @@ build: artifacts: paths: - "*.mbp" + expire_in: 365 days build tags: stage: build @@ -25,3 +26,4 @@ build tags: artifacts: paths: - "*.mbp" + expire_in: never From 947c4748b8cc591aec5b83c3786b775c427ddaf8 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Tue, 22 Feb 2022 23:10:42 +0200 Subject: [PATCH 02/25] Strip surrounding whitespace from item summary --- rss/bot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rss/bot.py b/rss/bot.py index 4e3f09a..12cf9ee 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -209,7 +209,7 @@ class RSSBot(Plugin): summary = (entry.get("summary") or entry.get("content_html") or entry.get("content_text") - or "") + or "").strip() id = str(entry["id"]) link = entry.get("url") or id return Entry(feed_id=feed_id, id=id, date=date, title=title, summary=summary, link=link) @@ -247,7 +247,7 @@ class RSSBot(Plugin): ).hexdigest()), date=cls._parse_rss_date(entry), title=getattr(entry, "title", ""), - summary=getattr(entry, "description", ""), + summary=getattr(entry, "description", "").strip(), link=getattr(entry, "link", ""), ) From 428b471fec923752a855cee487e225778cc35d3f Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Tue, 22 Feb 2022 23:11:11 +0200 Subject: [PATCH 03/25] Add some logs and hacky sorting --- rss/bot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rss/bot.py b/rss/bot.py index 12cf9ee..62941a5 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -122,6 +122,7 @@ class RSSBot(Plugin): tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now] feed: Feed entries: Iterable[Entry] + self.log.info(f"Polling {len(tasks)} feeds") for res in asyncio.as_completed(tasks): feed, entries = await res self.log.trace(f"Fetching {feed.id} (backoff: {feed.error_count} / {feed.next_retry}) " @@ -146,8 +147,10 @@ class RSSBot(Plugin): new_entries.pop(old_entry.id, None) self.log.trace(f"Feed {feed.id} had {len(new_entries)} new entries") self.db.add_entries(new_entries.values()) - for entry in new_entries.values(): + # TODO sort properly? + for entry in reversed(new_entries.values()): await self._broadcast(feed, entry, feed.subscriptions) + self.log.info(f"Finished polling {len(tasks)} feeds") async def _poll_feeds(self) -> None: self.log.debug("Polling started") From 18ef939a04f3734df1b8976baffe5136aaef85e4 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 26 Mar 2022 14:32:18 +0200 Subject: [PATCH 04/25] Switch to asyncpg for database --- .github/workflows/python-lint.yml | 26 +++ .pre-commit-config.yaml | 23 ++ maubot.yaml | 3 +- pyproject.toml | 12 + rss/bot.py | 275 +++++++++++++--------- rss/db.py | 374 ++++++++++++++---------------- rss/migrations.py | 67 +++++- 7 files changed, 470 insertions(+), 310 deletions(-) create mode 100644 .github/workflows/python-lint.yml create mode 100644 .pre-commit-config.yaml create mode 100644 pyproject.toml diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml new file mode 100644 index 0000000..7deb4c8 --- /dev/null +++ b/.github/workflows/python-lint.yml @@ -0,0 +1,26 @@ +name: Python lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: "3.10" + - uses: isort/isort-action@master + with: + sortPaths: "./rss" + - uses: psf/black@stable + with: + src: "./rss" + version: "22.1.0" + - name: pre-commit + run: | + pip install pre-commit + pre-commit run -av trailing-whitespace + pre-commit run -av end-of-file-fixer + pre-commit run -av check-yaml + pre-commit run -av check-added-large-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..5a205c9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: trailing-whitespace + exclude_types: [markdown] + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + # TODO convert to use the upstream psf/black when + # https://github.com/psf/black/issues/2493 gets fixed + - repo: local + hooks: + - id: black + name: black + entry: black --check + language: system + files: ^rss/.*\.py$ + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + files: ^rss/.*$ diff --git a/maubot.yaml b/maubot.yaml index 8506840..faef689 100644 --- a/maubot.yaml +++ b/maubot.yaml @@ -1,4 +1,4 @@ -maubot: 0.1.0 +maubot: 0.3.0 id: xyz.maubot.rss version: 0.2.6 license: AGPL-3.0-or-later @@ -10,3 +10,4 @@ extra_files: dependencies: - feedparser>=5.1 database: true +database_type: asyncpg diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..be9cdda --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[tool.isort] +profile = "black" +force_to_top = "typing" +from_first = true +combine_as_imports = true +known_first_party = ["mautrix", "maubot"] +line_length = 99 + +[tool.black] +line-length = 99 +target-version = ["py38"] +required-version = "22.1.0" diff --git a/rss/bot.py b/rss/bot.py index 62941a5..90776f6 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -1,5 +1,5 @@ # rss - A maubot plugin to subscribe to RSS/Atom feeds. -# Copyright (C) 2021 Tulir Asokan +# Copyright (C) 2022 Tulir Asokan # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -13,23 +13,34 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from typing import Type, List, Any, Dict, Tuple, Awaitable, Iterable, Optional +from __future__ import annotations + +from typing import Any, Iterable from datetime import datetime -from time import mktime, time from string import Template +from time import mktime, time import asyncio +import hashlib import aiohttp -import hashlib +import attr import feedparser -from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper -from mautrix.types import (StateEvent, EventType, MessageType, RoomID, EventID, - PowerLevelStateEventContent) -from maubot import Plugin, MessageEvent +from maubot import MessageEvent, Plugin from maubot.handlers import command, event +from mautrix.types import ( + EventID, + EventType, + MessageType, + PowerLevelStateEventContent, + RoomID, + StateEvent, +) +from mautrix.util.async_db import UpgradeTable +from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper -from .db import Database, Feed, Entry, Subscription +from .db import DBManager, Entry, Feed, Subscription +from .migrations import upgrade_table rss_change_level = EventType.find("xyz.maubot.rss", t_class=EventType.Class.STATE) @@ -47,7 +58,7 @@ class BoolArgument(command.Argument): def __init__(self, name: str, label: str = None, *, required: bool = False) -> None: super().__init__(name, label, required=required, pass_raw=False) - def match(self, val: str, **kwargs) -> Tuple[str, Any]: + def match(self, val: str, **kwargs) -> tuple[str, Any]: part = val.split(" ")[0].lower() if part in ("f", "false", "n", "no", "0"): res = False @@ -55,23 +66,27 @@ class BoolArgument(command.Argument): res = True else: raise ValueError("invalid boolean") - return val[len(part):], res + return val[len(part) :], res class RSSBot(Plugin): - db: Database + dbm: DBManager poll_task: asyncio.Future http: aiohttp.ClientSession - power_level_cache: Dict[RoomID, Tuple[int, PowerLevelStateEventContent]] + power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]] @classmethod - def get_config_class(cls) -> Type[BaseProxyConfig]: + def get_config_class(cls) -> type[BaseProxyConfig]: return Config + @classmethod + def get_db_upgrade_table(cls) -> UpgradeTable: + return upgrade_table + async def start(self) -> None: await super().start() self.config.load_and_update() - self.db = Database(self.database) + self.dbm = DBManager(self.database) self.http = self.client.api.session self.power_level_cache = {} self.poll_task = asyncio.ensure_future(self.poll_feeds(), loop=self.loop) @@ -89,21 +104,26 @@ class RSSBot(Plugin): self.log.exception("Fatal error while polling feeds") async def _send(self, feed: Feed, entry: Entry, sub: Subscription) -> EventID: - message = sub.notification_template.safe_substitute({ - "feed_url": feed.url, - "feed_title": feed.title, - "feed_subtitle": feed.subtitle, - "feed_link": feed.link, - **entry._asdict(), - }) + message = sub.notification_template.safe_substitute( + { + "feed_url": feed.url, + "feed_title": feed.title, + "feed_subtitle": feed.subtitle, + "feed_link": feed.link, + **attr.asdict(entry), + } + ) msgtype = MessageType.NOTICE if sub.send_notice else MessageType.TEXT try: - return await self.client.send_markdown(sub.room_id, message, msgtype=msgtype, - allow_html=True) + return await self.client.send_markdown( + sub.room_id, message, msgtype=msgtype, allow_html=True + ) except Exception as e: self.log.warning(f"Failed to send {entry.id} of {feed.id} to {sub.room_id}: {e}") - async def _broadcast(self, feed: Feed, entry: Entry, subscriptions: List[Subscription]) -> None: + async def _broadcast( + self, feed: Feed, entry: Entry, subscriptions: list[Subscription] + ) -> None: self.log.debug(f"Broadcasting {entry.id} of {feed.id}") spam_sleep = self.config["spam_sleep"] tasks = [self._send(feed, entry, sub) for sub in subscriptions] @@ -115,7 +135,7 @@ class RSSBot(Plugin): await asyncio.gather(*tasks) async def _poll_once(self) -> None: - subs = self.db.get_feeds() + subs = await self.dbm.get_feeds() if not subs: return now = int(time()) @@ -125,30 +145,33 @@ class RSSBot(Plugin): self.log.info(f"Polling {len(tasks)} feeds") for res in asyncio.as_completed(tasks): feed, entries = await res - self.log.trace(f"Fetching {feed.id} (backoff: {feed.error_count} / {feed.next_retry}) " - f"success: {bool(entries)}") + self.log.trace( + f"Fetching {feed.id} (backoff: {feed.error_count} / {feed.next_retry}) " + f"success: {bool(entries)}" + ) if not entries: error_count = feed.error_count + 1 next_retry_delay = self.config["update_interval"] * 60 * error_count next_retry_delay = min(next_retry_delay, self.config["max_backoff"] * 60) next_retry = int(time() + next_retry_delay) self.log.debug(f"Setting backoff of {feed.id} to {error_count} / {next_retry}") - self.db.set_backoff(feed, error_count, next_retry) + await self.dbm.set_backoff(feed, error_count, next_retry) continue elif feed.error_count > 0: self.log.debug(f"Resetting backoff of {feed.id}") - self.db.set_backoff(feed, error_count=0, next_retry=0) + await self.dbm.set_backoff(feed, error_count=0, next_retry=0) try: new_entries = {entry.id: entry for entry in entries} except Exception: self.log.exception(f"Weird error in items of {feed.url}") continue - for old_entry in self.db.get_entries(feed.id): + for old_entry in await self.dbm.get_entries(feed.id): new_entries.pop(old_entry.id, None) self.log.trace(f"Feed {feed.id} had {len(new_entries)} new entries") - self.db.add_entries(new_entries.values()) - # TODO sort properly? - for entry in reversed(new_entries.values()): + new_entry_list: list[Entry] = list(new_entries.values()) + new_entry_list.sort(key=lambda entry: (entry.date, entry.id)) + await self.dbm.add_entries(new_entry_list) + for entry in new_entry_list: await self._broadcast(feed, entry, feed.subscriptions) self.log.info(f"Finished polling {len(tasks)} feeds") @@ -163,21 +186,24 @@ class RSSBot(Plugin): self.log.exception("Error while polling feeds") await asyncio.sleep(self.config["update_interval"] * 60, loop=self.loop) - async def try_parse_feed(self, feed: Optional[Feed] = None) -> Tuple[Feed, Iterable[Entry]]: + async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]: try: - self.log.trace(f"Trying to fetch {feed.id} / {feed.url} " - f"(backoff: {feed.error_count} / {feed.next_retry})") + self.log.trace( + f"Trying to fetch {feed.id} / {feed.url} " + f"(backoff: {feed.error_count} / {feed.next_retry})" + ) return await self.parse_feed(feed=feed) except Exception as e: self.log.warning(f"Failed to parse feed {feed.id} / {feed.url}: {e}") return feed, [] - async def parse_feed(self, *, feed: Optional[Feed] = None, url: Optional[str] = None - ) -> Tuple[Feed, Iterable[Entry]]: + async def parse_feed( + self, *, feed: Feed | None = None, url: str | None = None + ) -> tuple[Feed, list[Entry]]: if feed is None: if url is None: raise ValueError("Either feed or url must be set") - feed = Feed(-1, url, "", "", "", 0, 0, []) + feed = Feed(id=-1, url=url, title="", subtitle="", link="") elif url is not None: raise ValueError("Only one of feed or url must be set") resp = await self.http.get(feed.url) @@ -188,38 +214,40 @@ class RSSBot(Plugin): return await self._parse_rss(feed, resp) @classmethod - async def _parse_json(cls, feed: Feed, resp: aiohttp.ClientResponse - ) -> Tuple[Feed, Iterable[Entry]]: + async def _parse_json( + cls, feed: Feed, resp: aiohttp.ClientResponse + ) -> tuple[Feed, list[Entry]]: content = await resp.json() - if content["version"] not in ("https://jsonfeed.org/version/1", - "https://jsonfeed.org/version/1.1"): + if content["version"] not in ( + "https://jsonfeed.org/version/1", + "https://jsonfeed.org/version/1.1", + ): raise ValueError("Unsupported JSON feed version") if not isinstance(content["items"], list): raise ValueError("Feed is not a valid JSON feed (items is not a list)") - feed = Feed(id=feed.id, title=content["title"], subtitle=content.get("subtitle", ""), - url=feed.url, link=content.get("home_page_url", ""), - next_retry=feed.next_retry, error_count=feed.error_count, - subscriptions=feed.subscriptions) - return feed, (cls._parse_json_entry(feed.id, entry) for entry in content["items"]) + feed.title = content["title"] + feed.subtitle = content.get("subtitle", "") + feed.link = content.get("home_page_url", "") + return feed, [cls._parse_json_entry(feed.id, entry) for entry in content["items"]] @classmethod - def _parse_json_entry(cls, feed_id: int, entry: Dict[str, Any]) -> Entry: + def _parse_json_entry(cls, feed_id: int, entry: dict[str, Any]) -> Entry: try: date = datetime.fromisoformat(entry["date_published"]) except (ValueError, KeyError): date = datetime.now() title = entry.get("title", "") - summary = (entry.get("summary") - or entry.get("content_html") - or entry.get("content_text") - or "").strip() + summary = ( + entry.get("summary") or entry.get("content_html") or entry.get("content_text") or "" + ).strip() id = str(entry["id"]) link = entry.get("url") or id return Entry(feed_id=feed_id, id=id, date=date, title=title, summary=summary, link=link) @classmethod - async def _parse_rss(cls, feed: Feed, resp: aiohttp.ClientResponse - ) -> Tuple[Feed, Iterable[Entry]]: + async def _parse_rss( + cls, feed: Feed, resp: aiohttp.ClientResponse + ) -> tuple[Feed, list[Entry]]: try: content = await resp.text() except UnicodeDecodeError: @@ -233,21 +261,27 @@ class RSSBot(Plugin): if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe): raise parsed_data.bozo_exception feed_data = parsed_data.get("feed", {}) - feed = Feed(id=feed.id, url=feed.url, title=feed_data.get("title", feed.url), - subtitle=feed_data.get("description", ""), link=feed_data.get("link", ""), - error_count=feed.error_count, next_retry=feed.next_retry, - subscriptions=feed.subscriptions) - return feed, (cls._parse_rss_entry(feed.id, entry) for entry in parsed_data.entries) + feed.title = feed_data.get("title", feed.url) + feed.subtitle = feed_data.get("description", "") + feed.link = feed_data.get("link", "") + return feed, [cls._parse_rss_entry(feed.id, entry) for entry in parsed_data.entries] @classmethod def _parse_rss_entry(cls, feed_id: int, entry: Any) -> Entry: return Entry( feed_id=feed_id, - id=(getattr(entry, "id", None) or - hashlib.sha1(" ".join([getattr(entry, "title", ""), - getattr(entry, "description", ""), - getattr(entry, "link", "")]).encode("utf-8") - ).hexdigest()), + id=( + getattr(entry, "id", None) + or hashlib.sha1( + " ".join( + [ + getattr(entry, "title", ""), + getattr(entry, "description", ""), + getattr(entry, "link", ""), + ] + ).encode("utf-8") + ).hexdigest() + ), date=cls._parse_rss_date(entry), title=getattr(entry, "title", ""), summary=getattr(entry, "description", "").strip(), @@ -286,109 +320,138 @@ class RSSBot(Plugin): if not isinstance(state_level, int): state_level = 50 if user_level < state_level: - await evt.reply("You don't have the permission to " - "manage the subscriptions of this room.") + await evt.reply( + "You don't have the permission to manage the subscriptions of this room." + ) return False return True - @command.new(name=lambda self: self.config["command_prefix"], - help="Manage this RSS bot", require_subcommand=True) + @command.new( + name=lambda self: self.config["command_prefix"], + help="Manage this RSS bot", + require_subcommand=True, + ) async def rss(self) -> None: pass - @rss.subcommand("subscribe", aliases=("s", "sub"), - help="Subscribe this room to a feed.") + @rss.subcommand("subscribe", aliases=("s", "sub"), help="Subscribe this room to a feed.") @command.argument("url", "feed URL", pass_raw=True) async def subscribe(self, evt: MessageEvent, url: str) -> None: if not await self.can_manage(evt): return - feed = self.db.get_feed_by_url(url) + feed = await self.dbm.get_feed_by_url(url) if not feed: try: info, entries = await self.parse_feed(url=url) except Exception as e: await evt.reply(f"Failed to load feed: {e}") return - feed = self.db.create_feed(info) - self.db.add_entries(entries, override_feed_id=feed.id) + feed = await self.dbm.create_feed(info) + await self.dbm.add_entries(entries, override_feed_id=feed.id) elif feed.error_count > 0: - self.db.set_backoff(feed, error_count=feed.error_count, next_retry=0) + await self.dbm.set_backoff(feed, error_count=feed.error_count, next_retry=0) feed_info = f"feed ID {feed.id}: [{feed.title}]({feed.url})" - sub, _ = self.db.get_subscription(feed.id, evt.room_id) + sub, _ = await self.dbm.get_subscription(feed.id, evt.room_id) if sub is not None: - subscriber = ("You" if sub.user_id == evt.sender - else f"[{sub.user_id}](https://matrix.to/#/{sub.user_id})") + subscriber = ( + "You" + if sub.user_id == evt.sender + else f"[{sub.user_id}](https://matrix.to/#/{sub.user_id})" + ) await evt.reply(f"{subscriber} had already subscribed this room to {feed_info}") else: - self.db.subscribe(feed.id, evt.room_id, evt.sender) + await self.dbm.subscribe(feed.id, evt.room_id, evt.sender) await evt.reply(f"Subscribed to {feed_info}") - @rss.subcommand("unsubscribe", aliases=("u", "unsub"), - help="Unsubscribe this room from a feed.") + @rss.subcommand( + "unsubscribe", aliases=("u", "unsub"), help="Unsubscribe this room from a feed." + ) @command.argument("feed_id", "feed ID", parser=int) async def unsubscribe(self, evt: MessageEvent, feed_id: int) -> None: if not await self.can_manage(evt): return - sub, feed = self.db.get_subscription(feed_id, evt.room_id) + sub, feed = await self.dbm.get_subscription(feed_id, evt.room_id) if not sub: await evt.reply("This room is not subscribed to that feed") return - self.db.unsubscribe(feed.id, evt.room_id) + await self.dbm.unsubscribe(feed.id, evt.room_id) await evt.reply(f"Unsubscribed from feed ID {feed.id}: [{feed.title}]({feed.url})") - @rss.subcommand("template", aliases=("t", "tpl"), - help="Change the notification template for a subscription in this room") + @rss.subcommand( + "template", + aliases=("t", "tpl"), + help="Change the notification template for a subscription in this room", + ) @command.argument("feed_id", "feed ID", parser=int) @command.argument("template", "new template", pass_raw=True) async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None: if not await self.can_manage(evt): return - sub, feed = self.db.get_subscription(feed_id, evt.room_id) + sub, feed = await self.dbm.get_subscription(feed_id, evt.room_id) if not sub: await evt.reply("This room is not subscribed to that feed") return - self.db.update_template(feed.id, evt.room_id, template) - sub = Subscription(feed_id=feed.id, room_id=sub.room_id, user_id=sub.user_id, - notification_template=Template(template), send_notice=sub.send_notice) - sample_entry = Entry(feed.id, "SAMPLE", datetime.now(), "Sample entry", - "This is a sample entry to demonstrate your new template", - "http://example.com") + await self.dbm.update_template(feed.id, evt.room_id, template) + sub = Subscription( + feed_id=feed.id, + room_id=sub.room_id, + user_id=sub.user_id, + notification_template=Template(template), + send_notice=sub.send_notice, + ) + sample_entry = Entry( + feed_id=feed.id, + id="SAMPLE", + date=datetime.now(), + title="Sample entry", + summary="This is a sample entry to demonstrate your new template", + link="http://example.com", + ) await evt.reply(f"Template for feed ID {feed.id} updated. Sample notification:") await self._send(feed, sample_entry, sub) - @rss.subcommand("notice", aliases=("n",), - help="Set whether or not the bot should send updates as m.notice") + @rss.subcommand( + "notice", aliases=("n",), help="Set whether or not the bot should send updates as m.notice" + ) @command.argument("feed_id", "feed ID", parser=int) @BoolArgument("setting", "true/false") async def command_notice(self, evt: MessageEvent, feed_id: int, setting: bool) -> None: if not await self.can_manage(evt): return - sub, feed = self.db.get_subscription(feed_id, evt.room_id) + sub, feed = await self.dbm.get_subscription(feed_id, evt.room_id) if not sub: await evt.reply("This room is not subscribed to that feed") return - self.db.set_send_notice(feed.id, evt.room_id, setting) + await self.dbm.set_send_notice(feed.id, evt.room_id, setting) send_type = "m.notice" if setting else "m.text" await evt.reply(f"Updates for feed ID {feed.id} will now be sent as `{send_type}`") @staticmethod def _format_subscription(feed: Feed, subscriber: str) -> str: - msg = (f"* {feed.id} - [{feed.title}]({feed.url}) " - f"(subscribed by [{subscriber}](https://matrix.to/#/{subscriber}))") + msg = ( + f"* {feed.id} - [{feed.title}]({feed.url}) " + f"(subscribed by [{subscriber}](https://matrix.to/#/{subscriber}))" + ) if feed.error_count > 1: msg += f" \n ⚠️ The last {feed.error_count} attempts to fetch the feed have failed!" return msg - @rss.subcommand("subscriptions", aliases=("ls", "list", "subs"), - help="List the subscriptions in the current room.") + @rss.subcommand( + "subscriptions", + aliases=("ls", "list", "subs"), + help="List the subscriptions in the current room.", + ) async def command_subscriptions(self, evt: MessageEvent) -> None: - subscriptions = self.db.get_feeds_by_room(evt.room_id) - await evt.reply("**Subscriptions in this room:**\n\n" - + "\n".join(self._format_subscription(feed, subscriber) - for feed, subscriber in subscriptions)) + subscriptions = await self.dbm.get_feeds_by_room(evt.room_id) + await evt.reply( + "**Subscriptions in this room:**\n\n" + + "\n".join( + self._format_subscription(feed, subscriber) for feed, subscriber in subscriptions + ) + ) @event.on(EventType.ROOM_TOMBSTONE) async def tombstone(self, evt: StateEvent) -> None: if not evt.content.replacement_room: return - self.db.update_room_id(evt.room_id, evt.content.replacement_room) + await self.dbm.update_room_id(evt.room_id, evt.content.replacement_room) diff --git a/rss/db.py b/rss/db.py index 3e71691..69c930c 100644 --- a/rss/db.py +++ b/rss/db.py @@ -1,5 +1,5 @@ # rss - A maubot plugin to subscribe to RSS/Atom feeds. -# Copyright (C) 2020 Tulir Asokan +# Copyright (C) 2022 Tulir Asokan # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -13,221 +13,207 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from typing import Iterable, NamedTuple, List, Optional, Dict, Tuple +from __future__ import annotations + from datetime import datetime from string import Template -from sqlalchemy import (Column, String, Integer, DateTime, Text, Boolean, ForeignKey, - Table, MetaData, - select, and_, true) -from sqlalchemy.engine.base import Engine +from asyncpg import Record +from attr import dataclass +import attr -from mautrix.types import UserID, RoomID - -Subscription = NamedTuple("Subscription", feed_id=int, room_id=RoomID, user_id=UserID, - notification_template=Template, send_notice=bool) -Feed = NamedTuple("Feed", id=int, url=str, title=str, subtitle=str, link=str, next_retry=int, - error_count=int, subscriptions=List[Subscription]) -Entry = NamedTuple("Entry", feed_id=int, id=str, date=datetime, title=str, summary=str, link=str) +from mautrix.types import RoomID, UserID +from mautrix.util.async_db import Database, Scheme -class Database: - db: Engine - feed: Table - subscription: Table - entry: Table - version: Table +@dataclass +class Subscription: + feed_id: int + room_id: RoomID + user_id: UserID + notification_template: Template + send_notice: bool - def __init__(self, db: Engine) -> None: + @classmethod + def from_row(cls, row: Record | None) -> Subscription | None: + if not row: + return None + feed_id = row["id"] + room_id = row["room_id"] + user_id = row["user_id"] + if not room_id or not user_id: + return None + send_notice = bool(row["send_notice"]) + tpl = Template(row["notification_template"]) + return cls( + feed_id=feed_id, + room_id=room_id, + user_id=user_id, + notification_template=tpl, + send_notice=send_notice, + ) + + +@dataclass +class Feed: + id: int + url: str + title: str + subtitle: str + link: str + + next_retry: int = 0 + error_count: int = 0 + + subscriptions: list[Subscription] = attr.ib(factory=lambda: []) + + @classmethod + def from_row(cls, row: Record | None) -> Feed | None: + if not row: + return None + data = {**row} + data.pop("room_id", None) + data.pop("user_id", None) + data.pop("send_notice", None) + data.pop("notification_template", None) + return cls(**data, subscriptions=[]) + + +date_fmt = "%Y-%m-%d %H:%M:%S" +date_fmt_microseconds = "%Y-%m-%d %H:%M:%S.%f" + + +@dataclass +class Entry: + feed_id: int + id: str + date: datetime + title: str + summary: str + link: str + + @classmethod + def from_row(cls, row: Record | None) -> Entry | None: + if not row: + return None + data = {**row} + date = data.pop("date") + if not isinstance(date, datetime): + try: + date = datetime.strptime(date, date_fmt_microseconds if "." in date else date_fmt) + except ValueError: + date = datetime.now() + return cls(**data, date=date) + + +class DBManager: + db: Database + + def __init__(self, db: Database) -> None: self.db = db - metadata = MetaData() - self.feed = Table("feed", metadata, - Column("id", Integer, primary_key=True, autoincrement=True), - Column("url", Text, nullable=False, unique=True), - Column("title", Text, nullable=False), - Column("subtitle", Text, nullable=False), - Column("link", Text, nullable=False), - Column("next_retry", Integer, nullable=False), - Column("error_count", Integer, nullable=False)) - self.subscription = Table("subscription", metadata, - Column("feed_id", Integer, ForeignKey("feed.id"), - primary_key=True), - Column("room_id", String(255), primary_key=True), - Column("user_id", String(255), nullable=False), - Column("notification_template", String(255), nullable=True), - Column("send_notice", Boolean, nullable=False, - server_default=true())) - self.entry = Table("entry", metadata, - Column("feed_id", Integer, ForeignKey("feed.id"), primary_key=True), - Column("id", String(255), primary_key=True), - Column("date", DateTime, nullable=False), - Column("title", Text, nullable=False), - Column("summary", Text, nullable=False), - Column("link", Text, nullable=False)) - self.version = Table("version", metadata, - Column("version", Integer, primary_key=True)) - self.upgrade() - def upgrade(self) -> None: - self.db.execute("CREATE TABLE IF NOT EXISTS version (version INTEGER PRIMARY KEY)") - try: - version, = next(self.db.execute(select([self.version.c.version]))) - except (StopIteration, IndexError): - version = 0 - if version == 0: - self.db.execute("""CREATE TABLE IF NOT EXISTS feed ( - id INTEGER NOT NULL, - url TEXT NOT NULL, - title TEXT NOT NULL, - subtitle TEXT NOT NULL, - link TEXT NOT NULL, - PRIMARY KEY (id), - UNIQUE (url) - )""") - self.db.execute("""CREATE TABLE IF NOT EXISTS subscription ( - feed_id INTEGER NOT NULL, - room_id VARCHAR(255) NOT NULL, - user_id VARCHAR(255) NOT NULL, - notification_template VARCHAR(255), - PRIMARY KEY (feed_id, room_id), - FOREIGN KEY(feed_id) REFERENCES feed (id) - )""") - self.db.execute("""CREATE TABLE IF NOT EXISTS entry ( - feed_id INTEGER NOT NULL, - id VARCHAR(255) NOT NULL, - date DATETIME NOT NULL, - title TEXT NOT NULL, - summary TEXT NOT NULL, - link TEXT NOT NULL, - PRIMARY KEY (feed_id, id), - FOREIGN KEY(feed_id) REFERENCES feed (id) - )""") - version = 1 - if version == 1: - self.db.execute("ALTER TABLE subscription ADD COLUMN send_notice BOOLEAN DEFAULT true") - version = 2 - if version == 2: - self.db.execute("ALTER TABLE feed ADD COLUMN next_retry BIGINT DEFAULT 0") - self.db.execute("ALTER TABLE feed ADD COLUMN error_count BIGINT DEFAULT 0") - version = 3 - self.db.execute(self.version.delete()) - self.db.execute(self.version.insert().values(version=version)) - - def get_feeds(self) -> Iterable[Feed]: - rows = self.db.execute(select([self.feed, - self.subscription.c.room_id, - self.subscription.c.user_id, - self.subscription.c.notification_template, - self.subscription.c.send_notice]) - .where(self.subscription.c.feed_id == self.feed.c.id)) - map: Dict[int, Feed] = {} + async def get_feeds(self) -> list[Feed]: + q = """ + SELECT id, url, title, subtitle, link, next_retry, error_count, + room_id, user_id, notification_template, send_notice + FROM feed INNER JOIN subscription ON feed.id = subscription.feed_id + """ + rows = await self.db.fetch(q) + feeds: dict[int, Feed] = {} for row in rows: - (feed_id, url, title, subtitle, link, next_retry, error_count, - room_id, user_id, notification_template, send_notice) = row - map.setdefault(feed_id, Feed(feed_id, url, title, subtitle, link, next_retry, - error_count, subscriptions=[])) - map[feed_id].subscriptions.append( - Subscription(feed_id=feed_id, room_id=room_id, user_id=user_id, - notification_template=Template(notification_template), - send_notice=send_notice)) - return map.values() + try: + feed = feeds[row["id"]] + except KeyError: + feed = feeds[row["id"]] = Feed.from_row(row) + feed.subscriptions.append(Subscription.from_row(row)) + return list(feeds.values()) - def get_feeds_by_room(self, room_id: RoomID) -> Iterable[Tuple[Feed, UserID]]: - return ((Feed(feed_id, url, title, subtitle, link, next_retry, error_count, - subscriptions=[]), - user_id) - for (feed_id, url, title, subtitle, link, next_retry, error_count, user_id) in - self.db.execute(select([self.feed, self.subscription.c.user_id]) - .where(and_(self.subscription.c.room_id == room_id, - self.subscription.c.feed_id == self.feed.c.id)))) + async def get_feeds_by_room(self, room_id: RoomID) -> list[tuple[Feed, UserID]]: + q = """ + SELECT id, url, title, subtitle, link, next_retry, error_count, user_id FROM feed + INNER JOIN subscription ON feed.id = subscription.feed_id AND subscription.room_id = $1 + """ + rows = await self.db.fetch(q, room_id) + return [(Feed.from_row(row), row["user_id"]) for row in rows] - def get_rooms_by_feed(self, feed_id: int) -> Iterable[RoomID]: - return (row[0] for row in - self.db.execute(select([self.subscription.c.room_id]) - .where(self.subscription.c.feed_id == feed_id))) + async def get_entries(self, feed_id: int) -> list[Entry]: + q = "SELECT feed_id, id, date, title, summary, link FROM entry WHERE feed_id = $1" + return [Entry.from_row(row) for row in await self.db.fetch(q, feed_id)] - def get_entries(self, feed_id: int) -> Iterable[Entry]: - return (Entry(*row) for row in - self.db.execute(select([self.entry]).where(self.entry.c.feed_id == feed_id))) - - def add_entries(self, entries: Iterable[Entry], override_feed_id: Optional[int] = None) -> None: + async def add_entries(self, entries: list[Entry], override_feed_id: int | None = None) -> None: if not entries: return - entries = [entry._asdict() for entry in entries] - if override_feed_id is not None: + if override_feed_id: for entry in entries: - entry["feed_id"] = override_feed_id - self.db.execute(self.entry.insert(), entries) + entry.feed_id = override_feed_id + records = [attr.astuple(entry) for entry in entries] + columns = ("feed_id", "id", "date", "title", "summary", "link") + async with self.db.acquire() as conn: + if self.db.scheme == Scheme.POSTGRES: + await conn.copy_records_to_table("entry", records=records, columns=columns) + else: + q = ( + "INSERT INTO entry (feed_id, id, date, title, summary, link) " + "VALUES ($1, $2, $3, $4, $5, $6)" + ) + await conn.executemany(q, records) - def get_feed_by_url(self, url: str) -> Optional[Feed]: - rows = self.db.execute(select([self.feed]).where(self.feed.c.url == url)) - try: - row = next(rows) - return Feed(*row, subscriptions=[]) - except (ValueError, StopIteration): - return None + async def get_feed_by_url(self, url: str) -> Feed | None: + q = "SELECT id, url, title, subtitle, link, next_retry, error_count FROM feed WHERE url=$1" + return Feed.from_row(await self.db.fetchrow(q, url)) - def get_feed_by_id(self, feed_id: int) -> Optional[Feed]: - rows = self.db.execute(select([self.feed]).where(self.feed.c.id == feed_id)) - try: - row = next(rows) - return Feed(*row, subscriptions=[]) - except (ValueError, StopIteration): - return None + async def get_subscription( + self, feed_id: int, room_id: RoomID + ) -> tuple[Subscription | None, Feed | None]: + q = """ + SELECT id, url, title, subtitle, link, next_retry, error_count, + room_id, user_id, notification_template, send_notice + FROM feed LEFT JOIN subscription ON feed.id = subscription.feed_id AND room_id = $2 + WHERE feed.id = $1 + """ + row = await self.db.fetchrow(q, feed_id, room_id) + return Subscription.from_row(row), Feed.from_row(row) - def get_subscription(self, feed_id: int, room_id: RoomID) -> Tuple[Optional[Subscription], - Optional[Feed]]: - tbl = self.subscription - rows = self.db.execute(select([self.feed, tbl.c.room_id, tbl.c.user_id, - tbl.c.notification_template, tbl.c.send_notice]) - .where(and_(tbl.c.feed_id == feed_id, tbl.c.room_id == room_id, - self.feed.c.id == feed_id))) - try: - (feed_id, url, title, subtitle, link, next_retry, error_count, - room_id, user_id, template, send_notice) = next(rows) - notification_template = Template(template) - return (Subscription(feed_id, room_id, user_id, notification_template, send_notice) - if room_id else None, - Feed(feed_id, url, title, subtitle, link, next_retry, error_count, [])) - except (ValueError, StopIteration): - return None, None + async def update_room_id(self, old: RoomID, new: RoomID) -> None: + await self.db.execute("UPDATE subscription SET room_id = $1 WHERE room_id = $2", new, old) - def update_room_id(self, old: RoomID, new: RoomID) -> None: - self.db.execute(self.subscription.update() - .where(self.subscription.c.room_id == old) - .values(room_id=new)) + async def create_feed(self, info: Feed) -> Feed: + q = ( + "INSERT INTO feed (url, title, subtitle, link, next_retry) " + "VALUES ($1, $2, $3, $4, $5) RETURNING (id)" + ) + info.id = await self.db.fetchval( + q, info.url, info.title, info.subtitle, info.link, info.next_retry + ) + return info - def create_feed(self, info: Feed) -> Feed: - res = self.db.execute(self.feed.insert().values(url=info.url, title=info.title, - subtitle=info.subtitle, link=info.link, - next_retry=info.next_retry)) - return Feed(id=res.inserted_primary_key[0], url=info.url, title=info.title, - subtitle=info.subtitle, link=info.link, next_retry=info.next_retry, - error_count=info.error_count, subscriptions=[]) + async def set_backoff(self, info: Feed, error_count: int, next_retry: int) -> None: + q = "UPDATE feed SET error_count = $2, next_retry = $3 WHERE id = $1" + await self.db.execute(q, info.id, error_count, next_retry) - def set_backoff(self, info: Feed, error_count: int, next_retry: int) -> None: - self.db.execute(self.feed.update() - .where(self.feed.c.id == info.id) - .values(error_count=error_count, next_retry=next_retry)) + async def subscribe( + self, + feed_id: int, + room_id: RoomID, + user_id: UserID, + template: str | None = None, + send_notice: bool = True, + ) -> None: + q = """ + INSERT INTO subscription (feed_id, room_id, user_id, notification_template, send_notice) + VALUES ($1, $2, $3, $4, $5) + """ + template = template or "New post in $feed_title: [$title]($link)" + await self.db.execute(q, feed_id, room_id, user_id, template, send_notice) - def subscribe(self, feed_id: int, room_id: RoomID, user_id: UserID) -> None: - self.db.execute(self.subscription.insert().values( - feed_id=feed_id, room_id=room_id, user_id=user_id, - notification_template="New post in $feed_title: [$title]($link)")) + async def unsubscribe(self, feed_id: int, room_id: RoomID) -> None: + q = "DELETE FROM subscription WHERE feed_id = $1 AND room_id = $2" + await self.db.execute(q, feed_id, room_id) - def unsubscribe(self, feed_id: int, room_id: RoomID) -> None: - tbl = self.subscription - self.db.execute(tbl.delete().where(and_(tbl.c.feed_id == feed_id, - tbl.c.room_id == room_id))) + async def update_template(self, feed_id: int, room_id: RoomID, template: str) -> None: + q = "UPDATE subscription SET notification_template=$3 WHERE feed_id=$1 AND room_id=$2" + await self.db.execute(q, feed_id, room_id, template) - def update_template(self, feed_id: int, room_id: RoomID, template: str) -> None: - tbl = self.subscription - self.db.execute(tbl.update() - .where(and_(tbl.c.feed_id == feed_id, tbl.c.room_id == room_id)) - .values(notification_template=template)) - - def set_send_notice(self, feed_id: int, room_id: RoomID, send_notice: bool) -> None: - tbl = self.subscription - self.db.execute(tbl.update() - .where(and_(tbl.c.feed_id == feed_id, tbl.c.room_id == room_id)) - .values(send_notice=send_notice)) + async def set_send_notice(self, feed_id: int, room_id: RoomID, send_notice: bool) -> None: + q = "UPDATE subscription SET send_notice=$3 WHERE feed_id=$1 AND room_id=$2" + await self.db.execute(q, feed_id, room_id, send_notice) diff --git a/rss/migrations.py b/rss/migrations.py index b2b6a6a..2dfd8e4 100644 --- a/rss/migrations.py +++ b/rss/migrations.py @@ -1,5 +1,5 @@ # rss - A maubot plugin to subscribe to RSS/Atom feeds. -# Copyright (C) 2019 Tulir Asokan +# Copyright (C) 2022 Tulir Asokan # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -13,13 +13,62 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from sqlalchemy import select -from sqlalchemy.engine.base import Engine -from alembic.migration import MigrationContext -from alembic.operations import Operations +from mautrix.util.async_db import Connection, Scheme, UpgradeTable + +upgrade_table = UpgradeTable() -def run(engine: Engine): - conn = engine.connect() - ctx = MigrationContext.configure(conn) - op = Operations(ctx) +@upgrade_table.register(description="Latest revision", upgrades_to=3) +async def upgrade_latest(conn: Connection, scheme: Scheme) -> None: + gen = "GENERATED ALWAYS AS IDENTITY" if scheme != Scheme.SQLITE else "" + await conn.execute( + f"""CREATE TABLE IF NOT EXISTS feed ( + id INTEGER {gen}, + url TEXT NOT NULL, + title TEXT NOT NULL, + subtitle TEXT NOT NULL, + link TEXT NOT NULL, + + next_retry BIGINT DEFAULT 0, + error_count BIGINT DEFAULT 0, + + PRIMARY KEY (id), + UNIQUE (url) + )""" + ) + await conn.execute( + """CREATE TABLE IF NOT EXISTS subscription ( + feed_id INTEGER, + room_id TEXT, + user_id TEXT NOT NULL, + + notification_template TEXT, + send_notice BOOLEAN DEFAULT true, + + PRIMARY KEY (feed_id, room_id), + FOREIGN KEY (feed_id) REFERENCES feed (id) + )""" + ) + await conn.execute( + """CREATE TABLE entry ( + feed_id INTEGER, + id TEXT, + date timestamp NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL, + link TEXT NOT NULL, + PRIMARY KEY (feed_id, id), + FOREIGN KEY (feed_id) REFERENCES feed (id) + );""" + ) + + +@upgrade_table.register(description="Add send_notice field to subscriptions") +async def upgrade_v2(conn: Connection) -> None: + await conn.execute("ALTER TABLE subscription ADD COLUMN send_notice BOOLEAN DEFAULT true") + + +@upgrade_table.register(description="Add error counts to feeds") +async def upgrade_v3(conn: Connection) -> None: + await conn.execute("ALTER TABLE feed ADD COLUMN next_retry BIGINT DEFAULT 0") + await conn.execute("ALTER TABLE feed ADD COLUMN error_count BIGINT DEFAULT 0") From 9a75ee4021920b2a7e47eedc02dc84cbbb63f566 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 26 Mar 2022 14:45:07 +0200 Subject: [PATCH 05/25] Make default notification template configurable Closes #29 Fixes #24 Co-authored-by: noantiq --- base-config.yaml | 2 ++ rss/bot.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/base-config.yaml b/base-config.yaml index 053b8b4..52c92d4 100644 --- a/base-config.yaml +++ b/base-config.yaml @@ -8,6 +8,8 @@ spam_sleep: 2 # The prefix for all commands # It has to be prefixed with ! in matrix to be recognised command_prefix: "rss" +# Default post notification template for new subscriptions +notification_template: "New post in $feed_title: [$title]($link)" # Users who can bypass room permission checks admins: - "@user:example.com" diff --git a/rss/bot.py b/rss/bot.py index 90776f6..b522369 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -51,6 +51,7 @@ class Config(BaseProxyConfig): helper.copy("max_backoff") helper.copy("spam_sleep") helper.copy("command_prefix") + helper.copy("notification_template") helper.copy("admins") @@ -360,7 +361,9 @@ class RSSBot(Plugin): ) await evt.reply(f"{subscriber} had already subscribed this room to {feed_info}") else: - await self.dbm.subscribe(feed.id, evt.room_id, evt.sender) + await self.dbm.subscribe( + feed.id, evt.room_id, evt.sender, self.config["notification_template"] + ) await evt.reply(f"Subscribed to {feed_info}") @rss.subcommand( From 7b609ebb241326a9bc1de389a3c34e5470575346 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 26 Mar 2022 17:19:32 +0200 Subject: [PATCH 06/25] Use different message when there are no subscriptions --- rss/bot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rss/bot.py b/rss/bot.py index b522369..f80de7e 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -446,6 +446,9 @@ class RSSBot(Plugin): ) async def command_subscriptions(self, evt: MessageEvent) -> None: subscriptions = await self.dbm.get_feeds_by_room(evt.room_id) + if len(subscriptions) == 0: + await evt.reply("There are no RSS subscriptions in this room") + return await evt.reply( "**Subscriptions in this room:**\n\n" + "\n".join( From b7e4a2a7bd33fcaa810e11e8ae8a0f31a06b1b88 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Mon, 28 Mar 2022 17:25:08 +0300 Subject: [PATCH 07/25] Add IF NOT EXISTS for entry table creation --- rss/migrations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rss/migrations.py b/rss/migrations.py index 2dfd8e4..689f784 100644 --- a/rss/migrations.py +++ b/rss/migrations.py @@ -50,7 +50,7 @@ async def upgrade_latest(conn: Connection, scheme: Scheme) -> None: )""" ) await conn.execute( - """CREATE TABLE entry ( + """CREATE TABLE IF NOT EXISTS entry ( feed_id INTEGER, id TEXT, date timestamp NOT NULL, @@ -59,7 +59,7 @@ async def upgrade_latest(conn: Connection, scheme: Scheme) -> None: link TEXT NOT NULL, PRIMARY KEY (feed_id, id), FOREIGN KEY (feed_id) REFERENCES feed (id) - );""" + )""" ) From b9bc6fbc814ccdbf4719fbe9e9df4b39448a23a6 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Mon, 28 Mar 2022 17:25:41 +0300 Subject: [PATCH 08/25] Bump version to 0.3.0 --- maubot.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maubot.yaml b/maubot.yaml index faef689..f17f5e4 100644 --- a/maubot.yaml +++ b/maubot.yaml @@ -1,6 +1,6 @@ maubot: 0.3.0 id: xyz.maubot.rss -version: 0.2.6 +version: 0.3.0 license: AGPL-3.0-or-later modules: - rss From 35f2fe63df4473e2aa8e8f82dd3f95a1ef7fc413 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 30 Apr 2022 21:14:19 +0300 Subject: [PATCH 09/25] Add support for old SQLites Closes #31 --- rss/db.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/rss/db.py b/rss/db.py index 69c930c..c7cea64 100644 --- a/rss/db.py +++ b/rss/db.py @@ -23,7 +23,7 @@ from attr import dataclass import attr from mautrix.types import RoomID, UserID -from mautrix.util.async_db import Database, Scheme +from mautrix.util.async_db import Database, Scheme, SQLiteCursor @dataclass @@ -182,9 +182,23 @@ class DBManager: "INSERT INTO feed (url, title, subtitle, link, next_retry) " "VALUES ($1, $2, $3, $4, $5) RETURNING (id)" ) - info.id = await self.db.fetchval( - q, info.url, info.title, info.subtitle, info.link, info.next_retry - ) + # SQLite only gained RETURNING support in v3.35 (2021-03-12) + # TODO remove this special case in a couple of years + if self.db.scheme == Scheme.SQLITE: + cur = await self.db.execute( + q.replace(" RETURNING (id)", ""), + info.url, + info.title, + info.subtitle, + info.link, + info.next_retry, + ) + assert isinstance(cur, SQLiteCursor) + info.id = cur.lastrowid + else: + info.id = await self.db.fetchval( + q, info.url, info.title, info.subtitle, info.link, info.next_retry + ) return info async def set_backoff(self, info: Feed, error_count: int, next_retry: int) -> None: From 70eb6efed52fb5e5a9a8e25398d16b219cf39883 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 30 Apr 2022 21:14:28 +0300 Subject: [PATCH 10/25] Fix Python 3.10 compatibility --- rss/bot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rss/bot.py b/rss/bot.py index f80de7e..d6048c5 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -90,7 +90,7 @@ class RSSBot(Plugin): self.dbm = DBManager(self.database) self.http = self.client.api.session self.power_level_cache = {} - self.poll_task = asyncio.ensure_future(self.poll_feeds(), loop=self.loop) + self.poll_task = asyncio.create_task(self.poll_feeds()) async def stop(self) -> None: await super().stop() @@ -131,7 +131,7 @@ class RSSBot(Plugin): if spam_sleep >= 0: for task in tasks: await task - await asyncio.sleep(spam_sleep, loop=self.loop) + await asyncio.sleep(spam_sleep) else: await asyncio.gather(*tasks) @@ -185,7 +185,7 @@ class RSSBot(Plugin): self.log.debug("Polling stopped") except Exception: self.log.exception("Error while polling feeds") - await asyncio.sleep(self.config["update_interval"] * 60, loop=self.loop) + await asyncio.sleep(self.config["update_interval"] * 60) async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]: try: From fa34d80c4f131a1ecf6108e80c3623a4eac57ea2 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 30 Apr 2022 21:14:35 +0300 Subject: [PATCH 11/25] Update and unpin black --- .github/workflows/python-lint.yml | 1 - .pre-commit-config.yaml | 13 +++++-------- pyproject.toml | 1 - 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 7deb4c8..fc28bdb 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -16,7 +16,6 @@ jobs: - uses: psf/black@stable with: src: "./rss" - version: "22.1.0" - name: pre-commit run: | pip install pre-commit diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a205c9..7f1b3e5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,17 +7,14 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files - # TODO convert to use the upstream psf/black when - # https://github.com/psf/black/issues/2493 gets fixed - - repo: local + - repo: https://github.com/psf/black + rev: 22.3.0 hooks: - id: black - name: black - entry: black --check - language: system - files: ^rss/.*\.py$ + language_version: python3 + files: ^rss/.*\.pyi?$ - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: - id: isort - files: ^rss/.*$ + files: ^rss/.*\.pyi?$ diff --git a/pyproject.toml b/pyproject.toml index be9cdda..3e608c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,4 +9,3 @@ line_length = 99 [tool.black] line-length = 99 target-version = ["py38"] -required-version = "22.1.0" From e87f332e0e243a04d6f8addd71ca14f1d40d40d7 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 30 Apr 2022 21:24:02 +0300 Subject: [PATCH 12/25] Don't break on old mautrix-python versions --- rss/db.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/rss/db.py b/rss/db.py index c7cea64..e6faa88 100644 --- a/rss/db.py +++ b/rss/db.py @@ -23,7 +23,13 @@ from attr import dataclass import attr from mautrix.types import RoomID, UserID -from mautrix.util.async_db import Database, Scheme, SQLiteCursor +from mautrix.util.async_db import Database, Scheme + +# TODO make this import unconditional after updating mautrix-python +try: + from mautrix.util.async_db import SQLiteCursor +except ImportError: + SQLiteCursor = None @dataclass @@ -193,7 +199,8 @@ class DBManager: info.link, info.next_retry, ) - assert isinstance(cur, SQLiteCursor) + if SQLiteCursor is not None: + assert isinstance(cur, SQLiteCursor) info.id = cur.lastrowid else: info.id = await self.db.fetchval( From e7af4d2657b33402c1fe4ca7f388aa0f83a8affb Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Mon, 2 May 2022 10:29:41 +0300 Subject: [PATCH 13/25] Bump version to 0.3.1 --- maubot.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maubot.yaml b/maubot.yaml index f17f5e4..38fba7d 100644 --- a/maubot.yaml +++ b/maubot.yaml @@ -1,6 +1,6 @@ maubot: 0.3.0 id: xyz.maubot.rss -version: 0.3.0 +version: 0.3.1 license: AGPL-3.0-or-later modules: - rss From 877dcffb9c695ad0320fc6f5079f626d8bae452b Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 18 Jun 2022 17:47:03 +0300 Subject: [PATCH 14/25] Use custom user agent --- rss/bot.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rss/bot.py b/rss/bot.py index d6048c5..0352f4c 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -26,7 +26,7 @@ import aiohttp import attr import feedparser -from maubot import MessageEvent, Plugin +from maubot import MessageEvent, Plugin, __version__ as maubot_version from maubot.handlers import command, event from mautrix.types import ( EventID, @@ -198,6 +198,12 @@ class RSSBot(Plugin): self.log.warning(f"Failed to parse feed {feed.id} / {feed.url}: {e}") return feed, [] + @property + def _feed_get_headers(self) -> dict[str, str]: + return { + "User-Agent": f"maubot/{maubot_version} +https://github.com/maubot/rss", + } + async def parse_feed( self, *, feed: Feed | None = None, url: str | None = None ) -> tuple[Feed, list[Entry]]: @@ -207,7 +213,7 @@ class RSSBot(Plugin): feed = Feed(id=-1, url=url, title="", subtitle="", link="") elif url is not None: raise ValueError("Only one of feed or url must be set") - resp = await self.http.get(feed.url) + resp = await self.http.get(feed.url, headers=self._feed_get_headers) ct = resp.headers["Content-Type"].split(";")[0].strip() if ct == "application/json" or ct == "application/feed+json": return await self._parse_json(feed, resp) From 30ad459870470dec9986675278c70a66d7267952 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sun, 19 Jun 2022 14:27:42 +0300 Subject: [PATCH 15/25] Move CI script to main maubot repo --- .gitlab-ci.yml | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 45ef06b..7c690ef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,29 +1,3 @@ -image: dock.mau.dev/maubot/maubot - -stages: -- build - -variables: - PYTHONPATH: /opt/maubot - -build: - stage: build - except: - - tags - script: - - python3 -m maubot.cli build -o xyz.maubot.$CI_PROJECT_NAME-$CI_COMMIT_REF_NAME-$CI_COMMIT_SHORT_SHA.mbp - artifacts: - paths: - - "*.mbp" - expire_in: 365 days - -build tags: - stage: build - only: - - tags - script: - - python3 -m maubot.cli build -o xyz.maubot.$CI_PROJECT_NAME-$CI_COMMIT_TAG.mbp - artifacts: - paths: - - "*.mbp" - expire_in: never +include: +- project: 'maubot/maubot' + file: '/.gitlab-ci-plugin.yml' From 03bb1280050e6fcb09405c8848cb58ec7807f8d9 Mon Sep 17 00:00:00 2001 From: Andrew Kvalheim Date: Thu, 14 Jul 2022 09:41:33 -0700 Subject: [PATCH 16/25] Key entries by link if missing ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves the problem of incorrectly duplicated entries in feeds that update content but don’t explicitly provide entry IDs. Example feed: - https://www.to-rss.xyz/wikipedia/current_events/ Example entry: Current events: 2022-07-13 https://en.wikipedia.org/wiki/Portal:Current_events/2022_July_13 [VARIABLE CONTENT] Wed, 13 Jul 2022 00:00:00 -0000 This behavior is suggested by the common practice of using an entry’s link as its ID value, and is consistent with typical feed aggregators such as Feedbin and Inoreader. --- rss/bot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rss/bot.py b/rss/bot.py index 0352f4c..42ca897 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -279,12 +279,12 @@ class RSSBot(Plugin): feed_id=feed_id, id=( getattr(entry, "id", None) + or getattr(entry, "link", None) or hashlib.sha1( " ".join( [ getattr(entry, "title", ""), getattr(entry, "description", ""), - getattr(entry, "link", ""), ] ).encode("utf-8") ).hexdigest() From f12d32ad3ccd3a4289df3860cbf65811420b42bf Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Mon, 3 Oct 2022 09:25:35 +0300 Subject: [PATCH 17/25] Bump version to 0.3.2 --- maubot.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maubot.yaml b/maubot.yaml index 38fba7d..4cefc3d 100644 --- a/maubot.yaml +++ b/maubot.yaml @@ -1,6 +1,6 @@ maubot: 0.3.0 id: xyz.maubot.rss -version: 0.3.1 +version: 0.3.2 license: AGPL-3.0-or-later modules: - rss From 1a52d18f5993e7c5e7c78729a396dbe686b0a560 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Tue, 21 Feb 2023 12:43:32 +0200 Subject: [PATCH 18/25] Show current template if ran without arguments --- rss/bot.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/rss/bot.py b/rss/bot.py index 0352f4c..6c6e2fc 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -21,6 +21,7 @@ from string import Template from time import mktime, time import asyncio import hashlib +import html import aiohttp import attr @@ -392,7 +393,7 @@ class RSSBot(Plugin): help="Change the notification template for a subscription in this room", ) @command.argument("feed_id", "feed ID", parser=int) - @command.argument("template", "new template", pass_raw=True) + @command.argument("template", "new template", pass_raw=True, required=False) async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None: if not await self.can_manage(evt): return @@ -400,6 +401,13 @@ class RSSBot(Plugin): if not sub: await evt.reply("This room is not subscribed to that feed") return + if not template: + await evt.reply( + '

Current template in this room:

'
+                f"{html.escape(sub.notification_template.template)}"
+                "
", allow_html=True, markdown=False, + ) + return await self.dbm.update_template(feed.id, evt.room_id, template) sub = Subscription( feed_id=feed.id, From ef4915e43442f500f232d7f5067d6f9dbc73b013 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Tue, 21 Feb 2023 12:47:13 +0200 Subject: [PATCH 19/25] Add usage to readme --- README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/README.md b/README.md index ee06f0c..aab772a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,30 @@ # rss A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix. + +## Usage +Basic commands: + +* `!rss subscribe ` - Subscribe the current room to a feed. +* `!rss unsubscribe ` - Unsubscribe the current room from a feed. +* `!rss subscriptions` - List subscriptions (and feed IDs) in the current room. +* `!rss notice [true/false]` - Set whether the bot should send new + posts as `m.notice` (if false, they're sent as `m.text`). +* `!rss template [new template]` - Change the post template for a + feed in the current room. If the new template is omitted, the bot replies + with the current template. + +### Templates +The default template is `New post in $feed_title: [$title]($link)`. + +Templates are interpreted as markdown with some simple variable substitution. +The following variables are available: + +* `$feed_url` - The URL that was used to subscribe to the feed. +* `$feed_link` - The home page of the feed. +* `$feed_title` - The title of the feed. +* `$feed_subtitle` - The subtitle of the feed. +* `$id` - The unique ID of the entry. +* `$date` - The date of the entry. +* `$title` - The title of the entry. +* `$summary` - The summary/description of the entry. +* `$link` - The link of the entry. From eeb71a008f39a2bcbe8b8bdba36ead433fd8eba0 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Tue, 21 Feb 2023 13:22:19 +0200 Subject: [PATCH 20/25] Fix formatting --- rss/bot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rss/bot.py b/rss/bot.py index 6c6e2fc..4806f1f 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -405,7 +405,9 @@ class RSSBot(Plugin): await evt.reply( '

Current template in this room:

'
                 f"{html.escape(sub.notification_template.template)}"
-                "
", allow_html=True, markdown=False, + "", + allow_html=True, + markdown=False, ) return await self.dbm.update_template(feed.id, evt.room_id, template) From a8f134012515202e4be7fab0b0609f08b8b068f4 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Thu, 30 Jan 2025 14:07:57 +0200 Subject: [PATCH 21/25] Update feedparser input --- rss/bot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rss/bot.py b/rss/bot.py index 945903b..423118c 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -22,6 +22,7 @@ from time import mktime, time import asyncio import hashlib import html +import io import aiohttp import attr @@ -264,7 +265,7 @@ class RSSBot(Plugin): except UnicodeDecodeError: content = str(await resp.read())[2:-1] headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"} - parsed_data = feedparser.parse(content, response_headers=headers) + parsed_data = feedparser.parse(io.StringIO(content), response_headers=headers) if parsed_data.bozo: if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe): raise parsed_data.bozo_exception From f62b0335dd0f12c95d873fe2729cca43d22f4b84 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Thu, 30 Jan 2025 14:08:04 +0200 Subject: [PATCH 22/25] Update linters --- .github/workflows/python-lint.yml | 2 +- .pre-commit-config.yaml | 6 +++--- pyproject.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index fc28bdb..18be560 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -9,7 +9,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: "3.13" - uses: isort/isort-action@master with: sortPaths: "./rss" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7f1b3e5..caefdcb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v5.0.0 hooks: - id: trailing-whitespace exclude_types: [markdown] @@ -8,13 +8,13 @@ repos: - id: check-yaml - id: check-added-large-files - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 25.1.0 hooks: - id: black language_version: python3 files: ^rss/.*\.pyi?$ - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 6.0.0 hooks: - id: isort files: ^rss/.*\.pyi?$ diff --git a/pyproject.toml b/pyproject.toml index 3e608c9..f143797 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,4 +8,4 @@ line_length = 99 [tool.black] line-length = 99 -target-version = ["py38"] +target-version = ["py310"] From 68e5a84096cfe26e0b95f870726912f94d540c8e Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Thu, 30 Jan 2025 14:10:05 +0200 Subject: [PATCH 23/25] Bump version to v0.4.0 --- maubot.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maubot.yaml b/maubot.yaml index 4cefc3d..c1ec735 100644 --- a/maubot.yaml +++ b/maubot.yaml @@ -1,6 +1,6 @@ maubot: 0.3.0 id: xyz.maubot.rss -version: 0.3.2 +version: 0.4.0 license: AGPL-3.0-or-later modules: - rss From 72d08096b7af74b1565d7958a1aac3f8e246e59d Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Thu, 30 Jan 2025 15:47:09 +0200 Subject: [PATCH 24/25] Pass raw data to feedparser. Fixes #59 --- rss/bot.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/rss/bot.py b/rss/bot.py index 423118c..74c1681 100644 --- a/rss/bot.py +++ b/rss/bot.py @@ -257,15 +257,9 @@ class RSSBot(Plugin): async def _parse_rss( cls, feed: Feed, resp: aiohttp.ClientResponse ) -> tuple[Feed, list[Entry]]: - try: - content = await resp.text() - except UnicodeDecodeError: - try: - content = await resp.text(encoding="utf-8", errors="ignore") - except UnicodeDecodeError: - content = str(await resp.read())[2:-1] + content = await resp.read() headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"} - parsed_data = feedparser.parse(io.StringIO(content), response_headers=headers) + parsed_data = feedparser.parse(io.BytesIO(content), response_headers=headers) if parsed_data.bozo: if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe): raise parsed_data.bozo_exception From 81ec8ed86494fae5f1dd49628c316158067ffeab Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Thu, 30 Jan 2025 15:53:50 +0200 Subject: [PATCH 25/25] Bump version to 0.4.1 --- maubot.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maubot.yaml b/maubot.yaml index c1ec735..b8c0836 100644 --- a/maubot.yaml +++ b/maubot.yaml @@ -1,6 +1,6 @@ maubot: 0.3.0 id: xyz.maubot.rss -version: 0.4.0 +version: 0.4.1 license: AGPL-3.0-or-later modules: - rss