mirror of
				https://github.com/maubot/rss.git
				synced 2025-10-30 18:38:52 -04:00 
			
		
		
		
	Compare commits
	
		
			No commits in common. "master" and "v0.3.2" have entirely different histories.
		
	
	
		
	
		
					 8 changed files with 43 additions and 75 deletions
				
			
		
							
								
								
									
										2
									
								
								.github/workflows/python-lint.yml
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/python-lint.yml
									
										
									
									
										vendored
									
									
								
							|  | @ -9,7 +9,7 @@ jobs: | |||
|     - uses: actions/checkout@v3 | ||||
|     - uses: actions/setup-python@v3 | ||||
|       with: | ||||
|         python-version: "3.13" | ||||
|         python-version: "3.10" | ||||
|     - uses: isort/isort-action@master | ||||
|       with: | ||||
|         sortPaths: "./rss" | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| repos: | ||||
|   - repo: https://github.com/pre-commit/pre-commit-hooks | ||||
|     rev: v5.0.0 | ||||
|     rev: v4.1.0 | ||||
|     hooks: | ||||
|       - id: trailing-whitespace | ||||
|         exclude_types: [markdown] | ||||
|  | @ -8,13 +8,13 @@ repos: | |||
|       - id: check-yaml | ||||
|       - id: check-added-large-files | ||||
|   - repo: https://github.com/psf/black | ||||
|     rev: 25.1.0 | ||||
|     rev: 22.3.0 | ||||
|     hooks: | ||||
|       - id: black | ||||
|         language_version: python3 | ||||
|         files: ^rss/.*\.pyi?$ | ||||
|   - repo: https://github.com/PyCQA/isort | ||||
|     rev: 6.0.0 | ||||
|     rev: 5.10.1 | ||||
|     hooks: | ||||
|       - id: isort | ||||
|         files: ^rss/.*\.pyi?$ | ||||
|  |  | |||
							
								
								
									
										28
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										28
									
								
								README.md
									
										
									
									
									
								
							|  | @ -1,30 +1,2 @@ | |||
| # rss | ||||
| A [maubot](https://github.com/maubot/maubot) that posts RSS feed updates to Matrix. | ||||
| 
 | ||||
| ## Usage | ||||
| Basic commands: | ||||
| 
 | ||||
| * `!rss subscribe <url>` - Subscribe the current room to a feed. | ||||
| * `!rss unsubscribe <feed ID>` - Unsubscribe the current room from a feed. | ||||
| * `!rss subscriptions` - List subscriptions (and feed IDs) in the current room. | ||||
| * `!rss notice <feed ID> [true/false]` - Set whether the bot should send new | ||||
|   posts as `m.notice` (if false, they're sent as `m.text`). | ||||
| * `!rss template <feed ID> [new template]` - Change the post template for a | ||||
|   feed in the current room. If the new template is omitted, the bot replies | ||||
|   with the current template. | ||||
| 
 | ||||
| ### Templates | ||||
| The default template is `New post in $feed_title: [$title]($link)`. | ||||
| 
 | ||||
| Templates are interpreted as markdown with some simple variable substitution. | ||||
| The following variables are available: | ||||
| 
 | ||||
| * `$feed_url` - The URL that was used to subscribe to the feed. | ||||
| * `$feed_link` - The home page of the feed. | ||||
| * `$feed_title` - The title of the feed. | ||||
| * `$feed_subtitle` - The subtitle of the feed. | ||||
| * `$id` - The unique ID of the entry. | ||||
| * `$date` - The date of the entry. | ||||
| * `$title` - The title of the entry. | ||||
| * `$summary` - The summary/description of the entry. | ||||
| * `$link` - The link of the entry. | ||||
|  |  | |||
|  | @ -2,8 +2,6 @@ | |||
| update_interval: 60 | ||||
| # Maximum backoff in minutes when failing to fetch feeds (defaults to 5 days) | ||||
| max_backoff: 7200 | ||||
| # How many feeds to poll in parallel? Set to 0 to disable limit. | ||||
| poll_parallelism_limit: 10 | ||||
| # The time to sleep between send requests when broadcasting a new feed entry. | ||||
| # Set to 0 to disable sleep or -1 to run all requests asynchronously at once. | ||||
| spam_sleep: 2 | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| maubot: 0.3.0 | ||||
| id: xyz.maubot.rss | ||||
| version: 0.4.1 | ||||
| version: 0.3.2 | ||||
| license: AGPL-3.0-or-later | ||||
| modules: | ||||
| - rss | ||||
|  |  | |||
|  | @ -8,4 +8,4 @@ line_length = 99 | |||
| 
 | ||||
| [tool.black] | ||||
| line-length = 99 | ||||
| target-version = ["py310"] | ||||
| target-version = ["py38"] | ||||
|  |  | |||
							
								
								
									
										49
									
								
								rss/bot.py
									
										
									
									
									
								
							
							
						
						
									
										49
									
								
								rss/bot.py
									
										
									
									
									
								
							|  | @ -18,11 +18,9 @@ from __future__ import annotations | |||
| from typing import Any, Iterable | ||||
| from datetime import datetime | ||||
| from string import Template | ||||
| from time import mktime, monotonic, time | ||||
| from time import mktime, time | ||||
| import asyncio | ||||
| import hashlib | ||||
| import html | ||||
| import io | ||||
| 
 | ||||
| import aiohttp | ||||
| import attr | ||||
|  | @ -55,7 +53,6 @@ class Config(BaseProxyConfig): | |||
|         helper.copy("command_prefix") | ||||
|         helper.copy("notification_template") | ||||
|         helper.copy("admins") | ||||
|         helper.copy("poll_parallelism_limit") | ||||
| 
 | ||||
| 
 | ||||
| class BoolArgument(command.Argument): | ||||
|  | @ -76,7 +73,6 @@ class BoolArgument(command.Argument): | |||
| class RSSBot(Plugin): | ||||
|     dbm: DBManager | ||||
|     poll_task: asyncio.Future | ||||
|     poll_sema: asyncio.Semaphore | None | ||||
|     http: aiohttp.ClientSession | ||||
|     power_level_cache: dict[RoomID, tuple[int, PowerLevelStateEventContent]] | ||||
| 
 | ||||
|  | @ -90,19 +86,12 @@ class RSSBot(Plugin): | |||
| 
 | ||||
|     async def start(self) -> None: | ||||
|         await super().start() | ||||
|         self.on_external_config_update() | ||||
|         self.config.load_and_update() | ||||
|         self.dbm = DBManager(self.database) | ||||
|         self.http = self.client.api.session | ||||
|         self.power_level_cache = {} | ||||
|         self.poll_task = asyncio.create_task(self.poll_feeds()) | ||||
| 
 | ||||
|     def on_external_config_update(self) -> None: | ||||
|         self.config.load_and_update() | ||||
|         poll_parallelism_limit = self.config["poll_parallelism_limit"] | ||||
|         self.poll_sema = ( | ||||
|             asyncio.Semaphore(poll_parallelism_limit) if poll_parallelism_limit > 0 else None | ||||
|         ) | ||||
| 
 | ||||
|     async def stop(self) -> None: | ||||
|         await super().stop() | ||||
|         self.poll_task.cancel() | ||||
|  | @ -151,7 +140,6 @@ class RSSBot(Plugin): | |||
|         if not subs: | ||||
|             return | ||||
|         now = int(time()) | ||||
|         start = monotonic() | ||||
|         tasks = [self.try_parse_feed(feed=feed) for feed in subs if feed.next_retry < now] | ||||
|         feed: Feed | ||||
|         entries: Iterable[Entry] | ||||
|  | @ -186,8 +174,7 @@ class RSSBot(Plugin): | |||
|             await self.dbm.add_entries(new_entry_list) | ||||
|             for entry in new_entry_list: | ||||
|                 await self._broadcast(feed, entry, feed.subscriptions) | ||||
|         duration = monotonic() - now | ||||
|         self.log.info(f"Finished polling {len(tasks)} feeds in {duration:.2f} seconds") | ||||
|         self.log.info(f"Finished polling {len(tasks)} feeds") | ||||
| 
 | ||||
|     async def _poll_feeds(self) -> None: | ||||
|         self.log.debug("Polling started") | ||||
|  | @ -201,13 +188,6 @@ class RSSBot(Plugin): | |||
|             await asyncio.sleep(self.config["update_interval"] * 60) | ||||
| 
 | ||||
|     async def try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]: | ||||
|         if self.poll_sema is not None: | ||||
|             async with self.poll_sema: | ||||
|                 return await self._try_parse_feed(feed) | ||||
|         else: | ||||
|             return await self._try_parse_feed(feed) | ||||
| 
 | ||||
|     async def _try_parse_feed(self, feed: Feed | None = None) -> tuple[Feed, list[Entry]]: | ||||
|         try: | ||||
|             self.log.trace( | ||||
|                 f"Trying to fetch {feed.id} / {feed.url} " | ||||
|  | @ -275,9 +255,15 @@ class RSSBot(Plugin): | |||
|     async def _parse_rss( | ||||
|         cls, feed: Feed, resp: aiohttp.ClientResponse | ||||
|     ) -> tuple[Feed, list[Entry]]: | ||||
|         content = await resp.read() | ||||
|         try: | ||||
|             content = await resp.text() | ||||
|         except UnicodeDecodeError: | ||||
|             try: | ||||
|                 content = await resp.text(encoding="utf-8", errors="ignore") | ||||
|             except UnicodeDecodeError: | ||||
|                 content = str(await resp.read())[2:-1] | ||||
|         headers = {"Content-Location": feed.url, **resp.headers, "Content-Encoding": "identity"} | ||||
|         parsed_data = feedparser.parse(io.BytesIO(content), response_headers=headers) | ||||
|         parsed_data = feedparser.parse(content, response_headers=headers) | ||||
|         if parsed_data.bozo: | ||||
|             if not isinstance(parsed_data.bozo_exception, feedparser.ThingsNobodyCaresAboutButMe): | ||||
|                 raise parsed_data.bozo_exception | ||||
|  | @ -293,12 +279,12 @@ class RSSBot(Plugin): | |||
|             feed_id=feed_id, | ||||
|             id=( | ||||
|                 getattr(entry, "id", None) | ||||
|                 or getattr(entry, "link", None) | ||||
|                 or hashlib.sha1( | ||||
|                     " ".join( | ||||
|                         [ | ||||
|                             getattr(entry, "title", ""), | ||||
|                             getattr(entry, "description", ""), | ||||
|                             getattr(entry, "link", ""), | ||||
|                         ] | ||||
|                     ).encode("utf-8") | ||||
|                 ).hexdigest() | ||||
|  | @ -406,7 +392,7 @@ class RSSBot(Plugin): | |||
|         help="Change the notification template for a subscription in this room", | ||||
|     ) | ||||
|     @command.argument("feed_id", "feed ID", parser=int) | ||||
|     @command.argument("template", "new template", pass_raw=True, required=False) | ||||
|     @command.argument("template", "new template", pass_raw=True) | ||||
|     async def command_template(self, evt: MessageEvent, feed_id: int, template: str) -> None: | ||||
|         if not await self.can_manage(evt): | ||||
|             return | ||||
|  | @ -414,15 +400,6 @@ class RSSBot(Plugin): | |||
|         if not sub: | ||||
|             await evt.reply("This room is not subscribed to that feed") | ||||
|             return | ||||
|         if not template: | ||||
|             await evt.reply( | ||||
|                 '<p>Current template in this room:</p><pre><code language="markdown">' | ||||
|                 f"{html.escape(sub.notification_template.template)}" | ||||
|                 "</code></pre>", | ||||
|                 allow_html=True, | ||||
|                 markdown=False, | ||||
|             ) | ||||
|             return | ||||
|         await self.dbm.update_template(feed.id, evt.room_id, template) | ||||
|         sub = Subscription( | ||||
|             feed_id=feed.id, | ||||
|  |  | |||
							
								
								
									
										27
									
								
								rss/db.py
									
										
									
									
									
								
							
							
						
						
									
										27
									
								
								rss/db.py
									
										
									
									
									
								
							|  | @ -25,6 +25,12 @@ import attr | |||
| from mautrix.types import RoomID, UserID | ||||
| from mautrix.util.async_db import Database, Scheme | ||||
| 
 | ||||
| # TODO make this import unconditional after updating mautrix-python | ||||
| try: | ||||
|     from mautrix.util.async_db import SQLiteCursor | ||||
| except ImportError: | ||||
|     SQLiteCursor = None | ||||
| 
 | ||||
| 
 | ||||
| @dataclass | ||||
| class Subscription: | ||||
|  | @ -182,9 +188,24 @@ class DBManager: | |||
|             "INSERT INTO feed (url, title, subtitle, link, next_retry) " | ||||
|             "VALUES ($1, $2, $3, $4, $5) RETURNING (id)" | ||||
|         ) | ||||
|         info.id = await self.db.fetchval( | ||||
|             q, info.url, info.title, info.subtitle, info.link, info.next_retry | ||||
|         ) | ||||
|         # SQLite only gained RETURNING support in v3.35 (2021-03-12) | ||||
|         # TODO remove this special case in a couple of years | ||||
|         if self.db.scheme == Scheme.SQLITE: | ||||
|             cur = await self.db.execute( | ||||
|                 q.replace(" RETURNING (id)", ""), | ||||
|                 info.url, | ||||
|                 info.title, | ||||
|                 info.subtitle, | ||||
|                 info.link, | ||||
|                 info.next_retry, | ||||
|             ) | ||||
|             if SQLiteCursor is not None: | ||||
|                 assert isinstance(cur, SQLiteCursor) | ||||
|             info.id = cur.lastrowid | ||||
|         else: | ||||
|             info.id = await self.db.fetchval( | ||||
|                 q, info.url, info.title, info.subtitle, info.link, info.next_retry | ||||
|             ) | ||||
|         return info | ||||
| 
 | ||||
|     async def set_backoff(self, info: Feed, error_count: int, next_retry: int) -> None: | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue