mirror of
https://github.com/maubot/rss.git
synced 2025-09-22 05:54:35 -04:00
Compare commits
20 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
7df6c62f6b | ||
![]() |
93984bef86 | ||
![]() |
81ec8ed864 | ||
![]() |
72d08096b7 | ||
![]() |
68e5a84096 | ||
![]() |
f62b0335dd | ||
![]() |
a8f1340125 | ||
![]() |
b58202ebfb | ||
![]() |
eeb71a008f | ||
![]() |
ef4915e434 | ||
![]() |
1a52d18f59 | ||
![]() |
f12d32ad3c | ||
![]() |
03bb128005 | ||
![]() |
30ad459870 | ||
![]() |
877dcffb9c | ||
![]() |
e7af4d2657 | ||
![]() |
e87f332e0e | ||
![]() |
fa34d80c4f | ||
![]() |
70eb6efed5 | ||
![]() |
35f2fe63df |
8 changed files with 90 additions and 62 deletions
3
.github/workflows/python-lint.yml
vendored
3
.github/workflows/python-lint.yml
vendored
|
@ -9,14 +9,13 @@ jobs:
|
|||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.13"
|
||||
- uses: isort/isort-action@master
|
||||
with:
|
||||
sortPaths: "./rss"
|
||||
- uses: psf/black@stable
|
||||
with:
|
||||
src: "./rss"
|
||||
version: "22.1.0"
|
||||
- name: pre-commit
|
||||
run: |
|
||||
pip install pre-commit
|
||||
|
|
|
@ -1,29 +1,3 @@
|
|||
image: dock.mau.dev/maubot/maubot
|
||||
|
||||
stages:
|
||||
- build
|
||||
|
||||
variables:
|
||||
PYTHONPATH: /opt/maubot
|
||||
|
||||
build:
|
||||
stage: build
|
||||
except:
|
||||
- tags
|
||||
script:
|
||||
- python3 -m maubot.cli build -o xyz.maubot.$CI_PROJECT_NAME-$CI_COMMIT_REF_NAME-$CI_COMMIT_SHORT_SHA.mbp
|
||||
artifacts:
|
||||
paths:
|
||||
- "*.mbp"
|
||||
expire_in: 365 days
|
||||
|
||||
build tags:
|
||||
stage: build
|
||||
only:
|
||||
- tags
|
||||
script:
|
||||
- python3 -m maubot.cli build -o xyz.maubot.$CI_PROJECT_NAME-$CI_COMMIT_TAG.mbp
|
||||
artifacts:
|
||||
paths:
|
||||
- "*.mbp"
|
||||
expire_in: never
|
||||
include:
|
||||
- project: 'maubot/maubot'
|
||||
file: '/.gitlab-ci-plugin.yml'
|
||||
|
|
|
@ -1,23 +1,20 @@
|
|||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.1.0
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
exclude_types: [markdown]
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-added-large-files
|
||||
# TODO convert to use the upstream psf/black when
|
||||
# https://github.com/psf/black/issues/2493 gets fixed
|
||||
- repo: local
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 25.1.0
|
||||
hooks:
|
||||
- id: black
|
||||
name: black
|
||||
entry: black --check
|
||||
language: system
|
||||
files: ^rss/.*\.py$
|
||||
language_version: python3
|
||||
files: ^rss/.*\.pyi?$
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.10.1
|
||||
rev: 6.0.0
|
||||
hooks:
|
||||
- id: isort
|
||||
files: ^rss/.*$
|
||||
files: ^rss/.*\.pyi?$
|
||||
|
|
28
README.md
28
README.md
|
@ -1,2 +1,30 @@
|
|||
# rss
|
||||
A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix.
|
||||
|
||||
## Usage
|
||||
Basic commands:
|
||||
|
||||
* `!rss subscribe <url>` - Subscribe the current room to a feed.
|
||||
* `!rss unsubscribe <feed ID>` - Unsubscribe the current room from a feed.
|
||||
* `!rss subscriptions` - List subscriptions (and feed IDs) in the current room.
|
||||
* `!rss notice <feed ID> [true/false]` - Set whether the bot should send new
|
||||
posts as `m.notice` (if false, they're sent as `m.text`).
|
||||
* `!rss template <feed ID> [new template]` - Change the post template for a
|
||||
feed in the current room. If the new template is omitted, the bot replies
|
||||
with the current template.
|
||||
|
||||
### Templates
|
||||
The default template is `New post in $feed_title: [$title]($link)`.
|
||||
|
||||
Templates are interpreted as markdown with some simple variable substitution.
|
||||
The following variables are available:
|
||||
|
||||
* `$feed_url` - The URL that was used to subscribe to the feed.
|
||||
* `$feed_link` - The home page of the feed.
|
||||
* `$feed_title` - The title of the feed.
|
||||
* `$feed_subtitle` - The subtitle of the feed.
|
||||
* `$id` - The unique ID of the entry.
|
||||
* `$date` - The date of the entry.
|
||||
* `$title` - The title of the entry.
|
||||
* `$summary` - The summary/description of the entry.
|
||||
* `$link` - The link of the entry.
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
update_interval: 60
|
||||
# Maximum backoff in minutes when failing to fetch feeds (defaults to 5 days)
|
||||
max_backoff: 7200
|
||||
# How many feeds to poll in parallel? Set to 0 to disable limit.
|
||||
poll_parallelism_limit: 10
|
||||
# The time to sleep between send requests when broadcasting a new feed entry.
|
||||
# Set to 0 to disable sleep or -1 to run all requests asynchronously at once.
|
||||
spam_sleep: 2
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
maubot: 0.3.0
|
||||
id: xyz.maubot.rss
|
||||
version: 0.3.0
|
||||
version: 0.4.1
|
||||
license: AGPL-3.0-or-later
|
||||
modules:
|
||||
- rss
|
||||
|
|
|
@ -8,5 +8,4 @@ line_length = 99
|
|||
|
||||
[tool.black]
|
||||
line-length = 99
|
||||
target-version = ["py38"]
|
||||
required-version = "22.1.0"
|
||||
target-version = ["py310"]
|
||||
|
|
65
rss/bot.py
65
rss/bot.py
|
@ -18,15 +18,17 @@ from __future__ import annotations
|
|||
from typing import Any, Iterable
|
||||
from datetime import datetime
|
||||
from string import Template
|
||||
from time import mktime, time
|
||||
from time import mktime, monotonic, time
|
||||
import asyncio
|
||||
import hashlib
|
||||
import html
|
||||
import io
|
||||
|
||||
import aiohttp
|
||||
import attr
|
||||
import feedparser
|
||||
|
||||
from maubot import MessageEvent, Plugin
|
||||
from maubot import MessageEvent, Plugin, __version__ as maubot_version
|
||||
from maubot.handlers import command, event
|
||||
from mautrix.types import (
|
||||
EventID,
|
||||
|
@ -53,6 +55,7 @@ class Config(BaseProxyConfig):
|
|||
helper.copy("command_prefix")
|
||||
helper.copy("notification_template")
|
||||
helper.copy("admins")
|
||||
helper.copy("poll_parallelism_limit")
|
||||
|
||||
|
||||
class BoolArgument(command.Argument):
|
||||
|
@ -73,6 +76,7 @@ class BoolArgument(command.Argument):
|
|||
class RSSBot(Plugin):
|
||||
dbm: DBManager
|
||||
poll_task: asyncio.Future
|
||||
poll_sema: asyncio.Semaphore | None
|
||||
http: aiohttp.ClientSession
|
||||
power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]]
|
||||
|
||||
|
@ -86,11 +90,18 @@ class RSSBot(Plugin):
|
|||
|
||||
async def start(self) -> None:
|
||||
await super().start()
|
||||
self.config.load_and_update()
|
||||
self.on_external_config_update()
|
||||
self.dbm = DBManager(self.database)
|
||||
self.http = self.client.api.session
|
||||
self.power_level_cache = {}
|
||||
self.poll_task = asyncio.ensure_future(self.poll_feeds(), loop=self.loop)
|
||||
self.poll_task = asyncio.create_task(self.poll_feeds())
|
||||
|
||||
def on_external_config_update(self) -> None:
|
||||
self.config.load_and_update()
|
||||
poll_parallelism_limit = self.config["poll_parallelism_limit"]
|
||||
self.poll_sema = (
|
||||
asyncio.Semaphore(poll_parallelism_limit) if poll_parallelism_limit > 0 else None
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
await super().stop()
|
||||
|
@ -131,7 +142,7 @@ class RSSBot(Plugin):
|
|||
if spam_sleep >= 0:
|
||||
for task in tasks:
|
||||
await task
|
||||
await asyncio.sleep(spam_sleep, loop=self.loop)
|
||||
await asyncio.sleep(spam_sleep)
|
||||
else:
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
@ -140,6 +151,7 @@ class RSSBot(Plugin):
|
|||
if not subs:
|
||||
return
|
||||
now = int(time())
|
||||
start = monotonic()
|
||||
tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now]
|
||||
feed: Feed
|
||||
entries: Iterable[Entry]
|
||||
|
@ -174,7 +186,8 @@ class RSSBot(Plugin):
|
|||
await self.dbm.add_entries(new_entry_list)
|
||||
for entry in new_entry_list:
|
||||
await self._broadcast(feed, entry, feed.subscriptions)
|
||||
self.log.info(f"Finished polling {len(tasks)} feeds")
|
||||
duration = monotonic() - now
|
||||
self.log.info(f"Finished polling {len(tasks)} feeds in {duration:.2f} seconds")
|
||||
|
||||
async def _poll_feeds(self) -> None:
|
||||
self.log.debug("Polling started")
|
||||
|
@ -185,9 +198,16 @@ class RSSBot(Plugin):
|
|||
self.log.debug("Polling stopped")
|
||||
except Exception:
|
||||
self.log.exception("Error while polling feeds")
|
||||
await asyncio.sleep(self.config["update_interval"] * 60, loop=self.loop)
|
||||
await asyncio.sleep(self.config["update_interval"] * 60)
|
||||
|
||||
async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
|
||||
if self.poll_sema is not None:
|
||||
async with self.poll_sema:
|
||||
return await self._try_parse_feed(feed)
|
||||
else:
|
||||
return await self._try_parse_feed(feed)
|
||||
|
||||
async def _try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]:
|
||||
try:
|
||||
self.log.trace(
|
||||
f"Trying to fetch {feed.id} / {feed.url} "
|
||||
|
@ -198,6 +218,12 @@ class RSSBot(Plugin):
|
|||
self.log.warning(f"Failed to parse feed {feed.id} / {feed.url}: {e}")
|
||||
return feed, []
|
||||
|
||||
@property
|
||||
def _feed_get_headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"User-Agent": f"maubot/{maubot_version} +https://github.com/maubot/rss",
|
||||
}
|
||||
|
||||
async def parse_feed(
|
||||
self, *, feed: Feed | None = None, url: str | None = None
|
||||
) -> tuple[Feed, list[Entry]]:
|
||||
|
@ -207,7 +233,7 @@ class RSSBot(Plugin):
|
|||
feed = Feed(id=-1, url=url, title="", subtitle="", link="")
|
||||
elif url is not None:
|
||||
raise ValueError("Only one of feed or url must be set")
|
||||
resp = await self.http.get(feed.url)
|
||||
resp = await self.http.get(feed.url, headers=self._feed_get_headers)
|
||||
ct = resp.headers["Content-Type"].split(";")[0].strip()
|
||||
if ct == "application/json" or ct == "application/feed+json":
|
||||
return await self._parse_json(feed, resp)
|
||||
|
@ -249,15 +275,9 @@ class RSSBot(Plugin):
|
|||
async def _parse_rss(
|
||||
cls, feed: Feed, resp: aiohttp.ClientResponse
|
||||
) -> tuple[Feed, list[Entry]]:
|
||||
try:
|
||||
content = await resp.text()
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
content = await resp.text(encoding="utf-8", errors="ignore")
|
||||
except UnicodeDecodeError:
|
||||
content = str(await resp.read())[2:-1]
|
||||
content = await resp.read()
|
||||
headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"}
|
||||
parsed_data = feedparser.parse(content, response_headers=headers)
|
||||
parsed_data = feedparser.parse(io.BytesIO(content), response_headers=headers)
|
||||
if parsed_data.bozo:
|
||||
if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe):
|
||||
raise parsed_data.bozo_exception
|
||||
|
@ -273,12 +293,12 @@ class RSSBot(Plugin):
|
|||
feed_id=feed_id,
|
||||
id=(
|
||||
getattr(entry, "id", None)
|
||||
or getattr(entry, "link", None)
|
||||
or hashlib.sha1(
|
||||
" ".join(
|
||||
[
|
||||
getattr(entry, "title", ""),
|
||||
getattr(entry, "description", ""),
|
||||
getattr(entry, "link", ""),
|
||||
]
|
||||
).encode("utf-8")
|
||||
).hexdigest()
|
||||
|
@ -386,7 +406,7 @@ class RSSBot(Plugin):
|
|||
help="Change the notification template for a subscription in this room",
|
||||
)
|
||||
@command.argument("feed_id", "feed ID", parser=int)
|
||||
@command.argument("template", "new template", pass_raw=True)
|
||||
@command.argument("template", "new template", pass_raw=True, required=False)
|
||||
async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None:
|
||||
if not await self.can_manage(evt):
|
||||
return
|
||||
|
@ -394,6 +414,15 @@ class RSSBot(Plugin):
|
|||
if not sub:
|
||||
await evt.reply("This room is not subscribed to that feed")
|
||||
return
|
||||
if not template:
|
||||
await evt.reply(
|
||||
'<p>Current template in this room:</p><pre><code language="markdown">'
|
||||
f"{html.escape(sub.notification_template.template)}"
|
||||
"</code></pre>",
|
||||
allow_html=True,
|
||||
markdown=False,
|
||||
)
|
||||
return
|
||||
await self.dbm.update_template(feed.id, evt.room_id, template)
|
||||
sub = Subscription(
|
||||
feed_id=feed.id,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue