make_full_schema.sh 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. #!/usr/bin/env bash
  2. #
  3. # This script generates SQL files for creating a brand new Synapse DB with the latest
  4. # schema, on both SQLite3 and Postgres.
  5. export PGHOST="localhost"
  6. POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$"
  7. POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$"
  8. POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$"
  9. REQUIRED_DEPS=("matrix-synapse" "psycopg2")
  10. usage() {
  11. echo
  12. echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]"
  13. echo
  14. echo "-p <postgres_username>"
  15. echo " Username to connect to local postgres instance. The password will be requested"
  16. echo " during script execution."
  17. echo "-c"
  18. echo " CI mode. Prints every command that the script runs."
  19. echo "-o <path>"
  20. echo " Directory to output full schema files to. You probably want to use"
  21. echo " '-o synapse/storage/schema'"
  22. echo "-n <schema number>"
  23. echo " Schema number for the new snapshot. Used to set the location of files within "
  24. echo " the output directory, mimicking that of synapse/storage/schemas."
  25. echo " Defaults to 9999."
  26. echo "-h"
  27. echo " Display this help text."
  28. echo ""
  29. echo ""
  30. echo "You probably want to invoke this with something like"
  31. echo " docker run --rm -e POSTGRES_PASSWORD=postgres -e POSTGRES_USER=postgres -e POSTGRES_DB=synapse -p 5432:5432 postgres:11-alpine"
  32. echo " echo postgres | scripts-dev/make_full_schema.sh -p postgres -n MY_SCHEMA_NUMBER -o synapse/storage/schema"
  33. echo ""
  34. echo " NB: make sure to run this against the *oldest* supported version of postgres,"
  35. echo " or else pg_dump might output non-backwards-compatible syntax."
  36. }
  37. SCHEMA_NUMBER="9999"
  38. while getopts "p:co:hn:" opt; do
  39. case $opt in
  40. p)
  41. export PGUSER=$OPTARG
  42. ;;
  43. c)
  44. # Print all commands that are being executed
  45. set -x
  46. ;;
  47. o)
  48. command -v realpath > /dev/null || (echo "The -o flag requires the 'realpath' binary to be installed" && exit 1)
  49. OUTPUT_DIR="$(realpath "$OPTARG")"
  50. ;;
  51. h)
  52. usage
  53. exit
  54. ;;
  55. n)
  56. SCHEMA_NUMBER="$OPTARG"
  57. ;;
  58. \?)
  59. echo "ERROR: Invalid option: -$OPTARG" >&2
  60. usage
  61. exit
  62. ;;
  63. esac
  64. done
  65. # Check that required dependencies are installed
  66. unsatisfied_requirements=()
  67. for dep in "${REQUIRED_DEPS[@]}"; do
  68. pip show "$dep" --quiet || unsatisfied_requirements+=("$dep")
  69. done
  70. if [ ${#unsatisfied_requirements} -ne 0 ]; then
  71. echo "Please install the following python packages: ${unsatisfied_requirements[*]}"
  72. exit 1
  73. fi
  74. if [ -z "$PGUSER" ]; then
  75. echo "No postgres username supplied"
  76. usage
  77. exit 1
  78. fi
  79. if [ -z "$OUTPUT_DIR" ]; then
  80. echo "No output directory supplied"
  81. usage
  82. exit 1
  83. fi
  84. # Create the output directory if it doesn't exist
  85. mkdir -p "$OUTPUT_DIR"
  86. read -rsp "Postgres password for '$PGUSER': " PGPASSWORD
  87. echo ""
  88. export PGPASSWORD
  89. # Exit immediately if a command fails
  90. set -e
  91. # cd to root of the synapse directory
  92. cd "$(dirname "$0")/.."
  93. # Create temporary SQLite and Postgres homeserver db configs and key file
  94. TMPDIR=$(mktemp -d)
  95. KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
  96. SQLITE_CONFIG=$TMPDIR/sqlite.conf
  97. SQLITE_MAIN_DB=$TMPDIR/main.db
  98. SQLITE_STATE_DB=$TMPDIR/state.db
  99. SQLITE_COMMON_DB=$TMPDIR/common.db
  100. POSTGRES_CONFIG=$TMPDIR/postgres.conf
  101. # Ensure these files are delete on script exit
  102. cleanup() {
  103. echo "Cleaning up temporary sqlite database and config files..."
  104. rm -r "$TMPDIR"
  105. echo "Cleaning up temporary Postgres database..."
  106. dropdb --if-exists "$POSTGRES_COMMON_DB_NAME"
  107. dropdb --if-exists "$POSTGRES_MAIN_DB_NAME"
  108. dropdb --if-exists "$POSTGRES_STATE_DB_NAME"
  109. }
  110. trap 'cleanup' EXIT
  111. cat > "$SQLITE_CONFIG" <<EOF
  112. server_name: "test"
  113. signing_key_path: "$KEY_FILE"
  114. macaroon_secret_key: "abcde"
  115. report_stats: false
  116. databases:
  117. common:
  118. name: "sqlite3"
  119. data_stores: []
  120. args:
  121. database: "$SQLITE_COMMON_DB"
  122. main:
  123. name: "sqlite3"
  124. data_stores: ["main"]
  125. args:
  126. database: "$SQLITE_MAIN_DB"
  127. state:
  128. name: "sqlite3"
  129. data_stores: ["state"]
  130. args:
  131. database: "$SQLITE_STATE_DB"
  132. # Suppress the key server warning.
  133. trusted_key_servers: []
  134. EOF
  135. cat > "$POSTGRES_CONFIG" <<EOF
  136. server_name: "test"
  137. signing_key_path: "$KEY_FILE"
  138. macaroon_secret_key: "abcde"
  139. report_stats: false
  140. databases:
  141. common:
  142. name: "psycopg2"
  143. data_stores: []
  144. args:
  145. user: "$PGUSER"
  146. host: "$PGHOST"
  147. password: "$PGPASSWORD"
  148. database: "$POSTGRES_COMMON_DB_NAME"
  149. main:
  150. name: "psycopg2"
  151. data_stores: ["main"]
  152. args:
  153. user: "$PGUSER"
  154. host: "$PGHOST"
  155. password: "$PGPASSWORD"
  156. database: "$POSTGRES_MAIN_DB_NAME"
  157. state:
  158. name: "psycopg2"
  159. data_stores: ["state"]
  160. args:
  161. user: "$PGUSER"
  162. host: "$PGHOST"
  163. password: "$PGPASSWORD"
  164. database: "$POSTGRES_STATE_DB_NAME"
  165. # Suppress the key server warning.
  166. trusted_key_servers: []
  167. EOF
  168. # Generate the server's signing key.
  169. echo "Generating SQLite3 db schema..."
  170. python -m synapse.app.homeserver --generate-keys -c "$SQLITE_CONFIG"
  171. # Make sure the SQLite3 database is using the latest schema and has no pending background update.
  172. echo "Running db background jobs..."
  173. poetry run python synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
  174. # Create the PostgreSQL database.
  175. echo "Creating postgres databases..."
  176. createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME"
  177. createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME"
  178. createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME"
  179. echo "Running db background jobs..."
  180. poetry run python synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
  181. echo "Dropping unwanted db tables..."
  182. # Some common tables are created and updated by Synapse itself and do not belong in the
  183. # schema.
  184. DROP_APP_MANAGED_TABLES="
  185. DROP TABLE schema_version;
  186. DROP TABLE schema_compat_version;
  187. DROP TABLE applied_schema_deltas;
  188. DROP TABLE applied_module_schemas;
  189. "
  190. # Other common tables are not created by Synapse and do belong in the schema.
  191. # TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But
  192. # since there's only one table there, I haven't bothered to do so.
  193. DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES
  194. DROP TABLE background_updates;
  195. "
  196. sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES"
  197. sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES"
  198. sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES"
  199. psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES"
  200. psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
  201. psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
  202. # For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation
  203. # details behind full text search tables. Omit these from the dumps.
  204. sqlite3 "$SQLITE_MAIN_DB" <<< "
  205. DROP TABLE event_search_content;
  206. DROP TABLE event_search_segments;
  207. DROP TABLE event_search_segdir;
  208. DROP TABLE event_search_docsize;
  209. DROP TABLE event_search_stat;
  210. DROP TABLE user_directory_search_content;
  211. DROP TABLE user_directory_search_segments;
  212. DROP TABLE user_directory_search_segdir;
  213. DROP TABLE user_directory_search_docsize;
  214. DROP TABLE user_directory_search_stat;
  215. "
  216. echo "Dumping SQLite3 schema..."
  217. mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER"
  218. sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
  219. sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
  220. sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
  221. sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
  222. sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
  223. sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
  224. cleanup_pg_schema() {
  225. # Cleanup as follows:
  226. # - Remove empty lines. pg_dump likes to output a lot of these.
  227. # - Remove comment-only lines. pg_dump also likes to output a lot of these to visually
  228. # separate tables etc.
  229. # - Remove "public." prefix --- the schema name.
  230. # - Remove "SET" commands. Last time I ran this, the output commands were
  231. # SET statement_timeout = 0;
  232. # SET lock_timeout = 0;
  233. # SET idle_in_transaction_session_timeout = 0;
  234. # SET client_encoding = 'UTF8';
  235. # SET standard_conforming_strings = on;
  236. # SET check_function_bodies = false;
  237. # SET xmloption = content;
  238. # SET client_min_messages = warning;
  239. # SET row_security = off;
  240. # SET default_table_access_method = heap;
  241. # - Very carefully remove specific SELECT statements. We CANNOT blanket remove all
  242. # SELECT statements because some of those have side-effects which we do want in the
  243. # schema. Last time I ran this, the only SELECTS were
  244. # SELECT pg_catalog.set_config('search_path', '', false);
  245. # and
  246. # SELECT pg_catalog.setval(text, bigint, bool);
  247. # We do want to remove the former, but the latter is important. If the last argument
  248. # is `true` or omitted, this marks the given integer as having been consumed and
  249. # will NOT appear as the nextval.
  250. sed -e '/^$/d' \
  251. -e '/^--/d' \
  252. -e 's/public\.//g' \
  253. -e '/^SET /d' \
  254. -e '/^SELECT pg_catalog.set_config/d'
  255. }
  256. echo "Dumping Postgres schema..."
  257. pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
  258. pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
  259. pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
  260. pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
  261. pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
  262. pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
  263. if [[ "$OUTPUT_DIR" == *synapse/storage/schema ]]; then
  264. echo "Updating contrib/datagrip symlinks..."
  265. ln -sf "../../synapse/storage/schema/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" "contrib/datagrip/common.sql"
  266. ln -sf "../../synapse/storage/schema/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" "contrib/datagrip/main.sql"
  267. ln -sf "../../synapse/storage/schema/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" "contrib/datagrip/state.sql"
  268. else
  269. echo "Not updating contrib/datagrip symlinks (unknown output directory)"
  270. fi
  271. echo "Done! Files dumped to: $OUTPUT_DIR"