mirror of
https://github.com/maubot/rss.git
synced 2025-11-23 12:20:39 -05:00
Compare commits
No commits in common. "master" and "v0.3.2" have entirely different histories.
8 changed files with 43 additions and 75 deletions
2
.github/workflows/python-lint.yml
vendored
2
.github/workflows/python-lint.yml
vendored
|
|
@ -9,7 +9,7 @@ jobs:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: actions/setup-python@v3
|
- uses: actions/setup-python@v3
|
||||||
with:
|
with:
|
||||||
python-version: "3.13"
|
python-version: "3.10"
|
||||||
- uses: isort/isort-action@master
|
- uses: isort/isort-action@master
|
||||||
with:
|
with:
|
||||||
sortPaths: "./rss"
|
sortPaths: "./rss"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v5.0.0
|
rev: v4.1.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
exclude_types: [markdown]
|
exclude_types: [markdown]
|
||||||
|
|
@ -8,13 +8,13 @@ repos:
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
- repo: https://github.com/psf/black
|
- repo: https://github.com/psf/black
|
||||||
rev: 25.1.0
|
rev: 22.3.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
language_version: python3
|
language_version: python3
|
||||||
files: ^rss/.*\.pyi?$
|
files: ^rss/.*\.pyi?$
|
||||||
- repo: https://github.com/PyCQA/isort
|
- repo: https://github.com/PyCQA/isort
|
||||||
rev: 6.0.0
|
rev: 5.10.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: isort
|
- id: isort
|
||||||
files: ^rss/.*\.pyi?$
|
files: ^rss/.*\.pyi?$
|
||||||
|
|
|
||||||
28
README.md
28
README.md
|
|
@ -1,30 +1,2 @@
|
||||||
# rss
|
# rss
|
||||||
A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix.
|
A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix.
|
||||||
|
|
||||||
## Usage
|
|
||||||
Basic commands:
|
|
||||||
|
|
||||||
* `!rss subscribe <url>` - Subscribe the current room to a feed.
|
|
||||||
* `!rss unsubscribe <feed ID>` - Unsubscribe the current room from a feed.
|
|
||||||
* `!rss subscriptions` - List subscriptions (and feed IDs) in the current room.
|
|
||||||
* `!rss notice <feed ID> [true/false]` - Set whether the bot should send new
|
|
||||||
posts as `m.notice` (if false, they're sent as `m.text`).
|
|
||||||
* `!rss template <feed ID> [new template]` - Change the post template for a
|
|
||||||
feed in the current room. If the new template is omitted, the bot replies
|
|
||||||
with the current template.
|
|
||||||
|
|
||||||
### Templates
|
|
||||||
The default template is `New post in $feed_title: [$title]($link)`.
|
|
||||||
|
|
||||||
Templates are interpreted as markdown with some simple variable substitution.
|
|
||||||
The following variables are available:
|
|
||||||
|
|
||||||
* `$feed_url` - The URL that was used to subscribe to the feed.
|
|
||||||
* `$feed_link` - The home page of the feed.
|
|
||||||
* `$feed_title` - The title of the feed.
|
|
||||||
* `$feed_subtitle` - The subtitle of the feed.
|
|
||||||
* `$id` - The unique ID of the entry.
|
|
||||||
* `$date` - The date of the entry.
|
|
||||||
* `$title` - The title of the entry.
|
|
||||||
* `$summary` - The summary/description of the entry.
|
|
||||||
* `$link` - The link of the entry.
|
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,6 @@
|
||||||
update_interval: 60
|
update_interval: 60
|
||||||
# Maximum backoff in minutes when failing to fetch feeds (defaults to 5 days)
|
# Maximum backoff in minutes when failing to fetch feeds (defaults to 5 days)
|
||||||
max_backoff: 7200
|
max_backoff: 7200
|
||||||
# How many feeds to poll in parallel? Set to 0 to disable limit.
|
|
||||||
poll_parallelism_limit: 10
|
|
||||||
# The time to sleep between send requests when broadcasting a new feed entry.
|
# The time to sleep between send requests when broadcasting a new feed entry.
|
||||||
# Set to 0 to disable sleep or -1 to run all requests asynchronously at once.
|
# Set to 0 to disable sleep or -1 to run all requests asynchronously at once.
|
||||||
spam_sleep: 2
|
spam_sleep: 2
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
maubot: 0.3.0
|
maubot: 0.3.0
|
||||||
id: xyz.maubot.rss
|
id: xyz.maubot.rss
|
||||||
version: 0.4.1
|
version: 0.3.2
|
||||||
license: AGPL-3.0-or-later
|
license: AGPL-3.0-or-later
|
||||||
modules:
|
modules:
|
||||||
- rss
|
- rss
|
||||||
|
|
|
||||||
|
|
@ -8,4 +8,4 @@ line_length = 99
|
||||||
|
|
||||||
[tool.black]
|
[tool.black]
|
||||||
line-length = 99
|
line-length = 99
|
||||||
target-version = ["py310"]
|
target-version = ["py38"]
|
||||||
|
|
|
||||||
49
rss/bot.py
49
rss/bot.py
|
|
@ -18,11 +18,9 @@ from __future__ import annotations
|
||||||
from typing import Any, Iterable
|
from typing import Any, Iterable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from string import Template
|
from string import Template
|
||||||
from time import mktime, monotonic, time
|
from time import mktime, time
|
||||||
import asyncio
|
import asyncio
|
||||||
import hashlib
|
import hashlib
|
||||||
import html
|
|
||||||
import io
|
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import attr
|
import attr
|
||||||
|
|
@ -55,7 +53,6 @@ class Config(BaseProxyConfig):
|
||||||
helper.copy("command_prefix")
|
helper.copy("command_prefix")
|
||||||
helper.copy("notification_template")
|
helper.copy("notification_template")
|
||||||
helper.copy("admins")
|
helper.copy("admins")
|
||||||
helper.copy("poll_parallelism_limit")
|
|
||||||
|
|
||||||
|
|
||||||
class BoolArgument(command.Argument):
|
class BoolArgument(command.Argument):
|
||||||
|
|
@ -76,7 +73,6 @@ class BoolArgument(command.Argument):
|
||||||
class RSSBot(Plugin):
|
class RSSBot(Plugin):
|
||||||
dbm: DBManager
|
dbm: DBManager
|
||||||
poll_task: asyncio.Future
|
poll_task: asyncio.Future
|
||||||
poll_sema: asyncio.Semaphore | None
|
|
||||||
http: aiohttp.ClientSession
|
http: aiohttp.ClientSession
|
||||||
power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]]
|
power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]]
|
||||||
|
|
||||||
|
|
@ -90,19 +86,12 @@ class RSSBot(Plugin):
|
||||||
|
|
||||||
async def start(self) -> None:
|
async def start(self) -> None:
|
||||||
await super().start()
|
await super().start()
|
||||||
self.on_external_config_update()
|
self.config.load_and_update()
|
||||||
self.dbm = DBManager(self.database)
|
self.dbm = DBManager(self.database)
|
||||||
self.http = self.client.api.session
|
self.http = self.client.api.session
|
||||||
self.power_level_cache = {}
|
self.power_level_cache = {}
|
||||||
self.poll_task = asyncio.create_task(self.poll_feeds())
|
self.poll_task = asyncio.create_task(self.poll_feeds())
|
||||||
|
|
||||||
def on_external_config_update(self) -> None:
|
|
||||||
self.config.load_and_update()
|
|
||||||
poll_parallelism_limit = self.config["poll_parallelism_limit"]
|
|
||||||
self.poll_sema = (
|
|
||||||
asyncio.Semaphore(poll_parallelism_limit) if poll_parallelism_limit > 0 else None
|
|
||||||
)
|
|
||||||
|
|
||||||
async def stop(self) -> None:
|
async def stop(self) -> None:
|
||||||
await super().stop()
|
await super().stop()
|
||||||
self.poll_task.cancel()
|
self.poll_task.cancel()
|
||||||
|
|
@ -151,7 +140,6 @@ class RSSBot(Plugin):
|
||||||
if not subs:
|
if not subs:
|
||||||
return
|
return
|
||||||
now = int(time())
|
now = int(time())
|
||||||
start = monotonic()
|
|
||||||
tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now]
|
tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now]
|
||||||
feed: Feed
|
feed: Feed
|
||||||
entries: Iterable[Entry]
|
entries: Iterable[Entry]
|
||||||
|
|
@ -186,8 +174,7 @@ class RSSBot(Plugin):
|
||||||
await self.dbm.add_entries(new_entry_list)
|
await self.dbm.add_entries(new_entry_list)
|
||||||
for entry in new_entry_list:
|
for entry in new_entry_list:
|
||||||
await self._broadcast(feed, entry, feed.subscriptions)
|
await self._broadcast(feed, entry, feed.subscriptions)
|
||||||
duration = monotonic() - now
|
self.log.info(f"Finished polling {len(tasks)} feeds")
|
||||||
self.log.info(f"Finished polling {len(tasks)} feeds in {duration:.2f} seconds")
|
|
||||||
|
|
||||||
async def _poll_feeds(self) -> None:
|
async def _poll_feeds(self) -> None:
|
||||||
self.log.debug("Polling started")
|
self.log.debug("Polling started")
|
||||||
|
|
@ -201,13 +188,6 @@ class RSSBot(Plugin):
|
||||||
await asyncio.sleep(self.config["update_interval"] * 60)
|
await asyncio.sleep(self.config["update_interval"] * 60)
|
||||||
|
|
||||||
async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
|
async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
|
||||||
if self.poll_sema is not None:
|
|
||||||
async with self.poll_sema:
|
|
||||||
return await self._try_parse_feed(feed)
|
|
||||||
else:
|
|
||||||
return await self._try_parse_feed(feed)
|
|
||||||
|
|
||||||
async def _try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
|
|
||||||
try:
|
try:
|
||||||
self.log.trace(
|
self.log.trace(
|
||||||
f"Trying to fetch {feed.id} / {feed.url} "
|
f"Trying to fetch {feed.id} / {feed.url} "
|
||||||
|
|
@ -275,9 +255,15 @@ class RSSBot(Plugin):
|
||||||
async def _parse_rss(
|
async def _parse_rss(
|
||||||
cls, feed: Feed, resp: aiohttp.ClientResponse
|
cls, feed: Feed, resp: aiohttp.ClientResponse
|
||||||
) -> tuple[Feed, list[Entry]]:
|
) -> tuple[Feed, list[Entry]]:
|
||||||
content = await resp.read()
|
try:
|
||||||
|
content = await resp.text()
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
content = await resp.text(encoding="utf-8", errors="ignore")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
content = str(await resp.read())[2:-1]
|
||||||
headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"}
|
headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"}
|
||||||
parsed_data = feedparser.parse(io.BytesIO(content), response_headers=headers)
|
parsed_data = feedparser.parse(content, response_headers=headers)
|
||||||
if parsed_data.bozo:
|
if parsed_data.bozo:
|
||||||
if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe):
|
if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe):
|
||||||
raise parsed_data.bozo_exception
|
raise parsed_data.bozo_exception
|
||||||
|
|
@ -293,12 +279,12 @@ class RSSBot(Plugin):
|
||||||
feed_id=feed_id,
|
feed_id=feed_id,
|
||||||
id=(
|
id=(
|
||||||
getattr(entry, "id", None)
|
getattr(entry, "id", None)
|
||||||
or getattr(entry, "link", None)
|
|
||||||
or hashlib.sha1(
|
or hashlib.sha1(
|
||||||
" ".join(
|
" ".join(
|
||||||
[
|
[
|
||||||
getattr(entry, "title", ""),
|
getattr(entry, "title", ""),
|
||||||
getattr(entry, "description", ""),
|
getattr(entry, "description", ""),
|
||||||
|
getattr(entry, "link", ""),
|
||||||
]
|
]
|
||||||
).encode("utf-8")
|
).encode("utf-8")
|
||||||
).hexdigest()
|
).hexdigest()
|
||||||
|
|
@ -406,7 +392,7 @@ class RSSBot(Plugin):
|
||||||
help="Change the notification template for a subscription in this room",
|
help="Change the notification template for a subscription in this room",
|
||||||
)
|
)
|
||||||
@command.argument("feed_id", "feed ID", parser=int)
|
@command.argument("feed_id", "feed ID", parser=int)
|
||||||
@command.argument("template", "new template", pass_raw=True, required=False)
|
@command.argument("template", "new template", pass_raw=True)
|
||||||
async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None:
|
async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None:
|
||||||
if not await self.can_manage(evt):
|
if not await self.can_manage(evt):
|
||||||
return
|
return
|
||||||
|
|
@ -414,15 +400,6 @@ class RSSBot(Plugin):
|
||||||
if not sub:
|
if not sub:
|
||||||
await evt.reply("This room is not subscribed to that feed")
|
await evt.reply("This room is not subscribed to that feed")
|
||||||
return
|
return
|
||||||
if not template:
|
|
||||||
await evt.reply(
|
|
||||||
'<p>Current template in this room:</p><pre><code language="markdown">'
|
|
||||||
f"{html.escape(sub.notification_template.template)}"
|
|
||||||
"</code></pre>",
|
|
||||||
allow_html=True,
|
|
||||||
markdown=False,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
await self.dbm.update_template(feed.id, evt.room_id, template)
|
await self.dbm.update_template(feed.id, evt.room_id, template)
|
||||||
sub = Subscription(
|
sub = Subscription(
|
||||||
feed_id=feed.id,
|
feed_id=feed.id,
|
||||||
|
|
|
||||||
21
rss/db.py
21
rss/db.py
|
|
@ -25,6 +25,12 @@ import attr
|
||||||
from mautrix.types import RoomID, UserID
|
from mautrix.types import RoomID, UserID
|
||||||
from mautrix.util.async_db import Database, Scheme
|
from mautrix.util.async_db import Database, Scheme
|
||||||
|
|
||||||
|
# TODO make this import unconditional after updating mautrix-python
|
||||||
|
try:
|
||||||
|
from mautrix.util.async_db import SQLiteCursor
|
||||||
|
except ImportError:
|
||||||
|
SQLiteCursor = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Subscription:
|
class Subscription:
|
||||||
|
|
@ -182,6 +188,21 @@ class DBManager:
|
||||||
"INSERT INTO feed (url, title, subtitle, link, next_retry) "
|
"INSERT INTO feed (url, title, subtitle, link, next_retry) "
|
||||||
"VALUES ($1, $2, $3, $4, $5) RETURNING (id)"
|
"VALUES ($1, $2, $3, $4, $5) RETURNING (id)"
|
||||||
)
|
)
|
||||||
|
# SQLite only gained RETURNING support in v3.35 (2021-03-12)
|
||||||
|
# TODO remove this special case in a couple of years
|
||||||
|
if self.db.scheme == Scheme.SQLITE:
|
||||||
|
cur = await self.db.execute(
|
||||||
|
q.replace(" RETURNING (id)", ""),
|
||||||
|
info.url,
|
||||||
|
info.title,
|
||||||
|
info.subtitle,
|
||||||
|
info.link,
|
||||||
|
info.next_retry,
|
||||||
|
)
|
||||||
|
if SQLiteCursor is not None:
|
||||||
|
assert isinstance(cur, SQLiteCursor)
|
||||||
|
info.id = cur.lastrowid
|
||||||
|
else:
|
||||||
info.id = await self.db.fetchval(
|
info.id = await self.db.fetchval(
|
||||||
q, info.url, info.title, info.subtitle, info.link, info.next_retry
|
q, info.url, info.title, info.subtitle, info.link, info.next_retry
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue