Reorganise the database schema directories (#9932)

The hope here is that by moving all the schema files into synapse/storage/schema, it gets a bit easier for newcomers to navigate.

It certainly got easier for me to write a helpful README. There's more to do on that front, but I'll follow up with other PRs for that.
This commit is contained in:
Richard van der Hoff 2021-05-07 10:22:05 +01:00 committed by GitHub
parent 8771b1337d
commit 25f43faa70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
284 changed files with 81 additions and 47 deletions

1
changelog.d/9932.misc Normal file
View File

@ -0,0 +1 @@
Move database schema files into a common directory.

View File

@ -1,21 +0,0 @@
# Synapse Database Schemas
These schemas are used as a basis to create brand new Synapse databases, on both
SQLite3 and Postgres.
## Building full schema dumps
If you want to recreate these schemas, they need to be made from a database that
has had all background updates run.
To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
`full.sql.postgres ` and `full.sql.sqlite` files.
Ensure postgres is installed and your user has the ability to run bash commands
such as `createdb`, then call
./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
There are currently two folders with full-schema snapshots. `16` is a snapshot
from 2015, for historical reference. The other contains the most recent full
schema snapshot.

View File

@ -26,16 +26,13 @@ from synapse.config.homeserver import HomeServerConfig
from synapse.storage.database import LoggingDatabaseConnection from synapse.storage.database import LoggingDatabaseConnection
from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.engines import BaseDatabaseEngine
from synapse.storage.engines.postgres import PostgresEngine from synapse.storage.engines.postgres import PostgresEngine
from synapse.storage.schema import SCHEMA_VERSION
from synapse.storage.types import Cursor from synapse.storage.types import Cursor
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Remember to update this number every time a change is made to database schema_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "schema")
# schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 59
dir_path = os.path.abspath(os.path.dirname(__file__))
class PrepareDatabaseException(Exception): class PrepareDatabaseException(Exception):
@ -168,6 +165,13 @@ def _setup_new_database(
Example directory structure: Example directory structure:
schema/ schema/
common/
delta/
...
full_schemas/
11/
foo.sql
main/
delta/ delta/
... ...
full_schemas/ full_schemas/
@ -175,15 +179,14 @@ def _setup_new_database(
test.sql test.sql
... ...
11/ 11/
foo.sql
bar.sql bar.sql
... ...
In the example foo.sql and bar.sql would be run, and then any delta files In the example foo.sql and bar.sql would be run, and then any delta files
for versions strictly greater than 11. for versions strictly greater than 11.
Note: we apply the full schemas and deltas from the top level `schema/` Note: we apply the full schemas and deltas from the `schema/common`
folder as well those in the data stores specified. folder as well those in the databases specified.
Args: Args:
cur: a database cursor cur: a database cursor
@ -195,12 +198,12 @@ def _setup_new_database(
# configured to our liking. # configured to our liking.
database_engine.check_new_database(cur) database_engine.check_new_database(cur)
current_dir = os.path.join(dir_path, "schema", "full_schemas") full_schemas_dir = os.path.join(schema_path, "common", "full_schemas")
# First we find the highest full schema version we have # First we find the highest full schema version we have
valid_versions = [] valid_versions = []
for filename in os.listdir(current_dir): for filename in os.listdir(full_schemas_dir):
try: try:
ver = int(filename) ver = int(filename)
except ValueError: except ValueError:
@ -218,15 +221,13 @@ def _setup_new_database(
logger.debug("Initialising schema v%d", max_current_ver) logger.debug("Initialising schema v%d", max_current_ver)
# Now lets find all the full schema files, both in the global schema and # Now let's find all the full schema files, both in the common schema and
# in data store schemas. # in database schemas.
directories = [os.path.join(current_dir, str(max_current_ver))] directories = [os.path.join(full_schemas_dir, str(max_current_ver))]
directories.extend( directories.extend(
os.path.join( os.path.join(
dir_path, schema_path,
"databases",
database, database,
"schema",
"full_schemas", "full_schemas",
str(max_current_ver), str(max_current_ver),
) )
@ -357,6 +358,9 @@ def _upgrade_existing_database(
check_database_before_upgrade(cur, database_engine, config) check_database_before_upgrade(cur, database_engine, config)
start_ver = current_version start_ver = current_version
# if we got to this schema version by running a full_schema rather than a series
# of deltas, we should not run the deltas for this version.
if not upgraded: if not upgraded:
start_ver += 1 start_ver += 1
@ -385,12 +389,10 @@ def _upgrade_existing_database(
# directories for schema updates. # directories for schema updates.
# First we find the directories to search in # First we find the directories to search in
delta_dir = os.path.join(dir_path, "schema", "delta", str(v)) delta_dir = os.path.join(schema_path, "common", "delta", str(v))
directories = [delta_dir] directories = [delta_dir]
for database in databases: for database in databases:
directories.append( directories.append(os.path.join(schema_path, database, "delta", str(v)))
os.path.join(dir_path, "databases", database, "schema", "delta", str(v))
)
# Used to check if we have any duplicate file names # Used to check if we have any duplicate file names
file_name_counter = Counter() # type: CounterType[str] file_name_counter = Counter() # type: CounterType[str]
@ -621,8 +623,8 @@ def _get_or_create_schema_state(
txn: Cursor, database_engine: BaseDatabaseEngine txn: Cursor, database_engine: BaseDatabaseEngine
) -> Optional[Tuple[int, List[str], bool]]: ) -> Optional[Tuple[int, List[str], bool]]:
# Bluntly try creating the schema_version tables. # Bluntly try creating the schema_version tables.
schema_path = os.path.join(dir_path, "schema", "schema_version.sql") sql_path = os.path.join(schema_path, "common", "schema_version.sql")
executescript(txn, schema_path) executescript(txn, sql_path)
txn.execute("SELECT version, upgraded FROM schema_version") txn.execute("SELECT version, upgraded FROM schema_version")
row = txn.fetchone() row = txn.fetchone()

View File

@ -0,0 +1,37 @@
# Synapse Database Schemas
This directory contains the schema files used to build Synapse databases.
Synapse supports splitting its datastore across multiple physical databases (which can
be useful for large installations), and the schema files are therefore split according
to the logical database they are apply to.
At the time of writing, the following "logical" databases are supported:
* `state` - used to store Matrix room state (more specifically, `state_groups`,
their relationships and contents.)
* `main` - stores everything else.
Addionally, the `common` directory contains schema files for tables which must be
present on *all* physical databases.
## Full schema dumps
In the `full_schemas` directories, only the most recently-numbered snapshot is useful
(`54` at the time of writing). Older snapshots (eg, `16`) are present for historical
reference only.
## Building full schema dumps
If you want to recreate these schemas, they need to be made from a database that
has had all background updates run.
To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
`full.sql.postgres` and `full.sql.sqlite` files.
Ensure postgres is installed, then run:
./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
NB at the time of writing, this script predates the split into separate `state`/`main`
databases so will require updates to handle that correctly.

View File

@ -0,0 +1,17 @@
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 59

Some files were not shown because too many files have changed in this diff Show More