migration_helpers.rb 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
  1. # frozen_string_literal: true
  2. # This file is copied almost entirely from GitLab, which has done a large
  3. # amount of work to ensure that migrations can happen with minimal downtime.
  4. # Many thanks to those engineers.
  5. # Changes have been made to remove dependencies on other GitLab files and to
  6. # shorten temporary column names.
  7. # Documentation on using these functions (and why one might do so):
  8. # https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/development/what_requires_downtime.md
  9. # The file itself:
  10. # https://gitlab.com/gitlab-org/gitlab-ce/blob/master/lib/gitlab/database/migration_helpers.rb
  11. # It is licensed as follows:
  12. # Copyright (c) 2011-2017 GitLab B.V.
  13. # Permission is hereby granted, free of charge, to any person obtaining a copy
  14. # of this software and associated documentation files (the "Software"), to deal
  15. # in the Software without restriction, including without limitation the rights
  16. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17. # copies of the Software, and to permit persons to whom the Software is
  18. # furnished to do so, subject to the following conditions:
  19. # The above copyright notice and this permission notice shall be included in
  20. # all copies or substantial portions of the Software.
  21. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. # THE SOFTWARE.
  28. # This is bad form, but there are enough differences that it's impractical to do
  29. # otherwise:
  30. # rubocop:disable all
  31. module Mastodon
  32. module MigrationHelpers
  33. class CorruptionError < StandardError
  34. attr_reader :index_name
  35. def initialize(index_name)
  36. @index_name = index_name
  37. super "The index `#{index_name}` seems to be corrupted, it contains duplicate rows. " \
  38. 'For information on how to fix this, see our documentation: ' \
  39. 'https://docs.joinmastodon.org/admin/troubleshooting/index-corruption/'
  40. end
  41. def cause
  42. nil
  43. end
  44. def backtrace
  45. []
  46. end
  47. end
  48. # Model that can be used for querying permissions of a SQL user.
  49. class Grant < ActiveRecord::Base
  50. self.table_name = 'information_schema.role_table_grants'
  51. def self.scope_to_current_user
  52. where('grantee = user')
  53. end
  54. # Returns true if the current user can create and execute triggers on the
  55. # given table.
  56. def self.create_and_execute_trigger?(table)
  57. priv = where(privilege_type: 'TRIGGER', table_name: table)
  58. priv.scope_to_current_user.any?
  59. end
  60. end
  61. BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
  62. BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
  63. # Gets an estimated number of rows for a table
  64. def estimate_rows_in_table(table_name)
  65. exec_query('SELECT reltuples FROM pg_class WHERE relname = ' +
  66. "'#{table_name}'").to_a.first['reltuples']
  67. end
  68. # Adds `created_at` and `updated_at` columns with timezone information.
  69. #
  70. # This method is an improved version of Rails' built-in method `add_timestamps`.
  71. #
  72. # Available options are:
  73. # default - The default value for the column.
  74. # null - When set to `true` the column will allow NULL values.
  75. # The default is to not allow NULL values.
  76. def add_timestamps_with_timezone(table_name, **options)
  77. options[:null] = false if options[:null].nil?
  78. [:created_at, :updated_at].each do |column_name|
  79. if options[:default] && transaction_open?
  80. raise '`add_timestamps_with_timezone` with default value cannot be run inside a transaction. ' \
  81. 'You can disable transactions by calling `disable_ddl_transaction!` ' \
  82. 'in the body of your migration class'
  83. end
  84. # If default value is presented, use `add_column_with_default` method instead.
  85. if options[:default]
  86. add_column_with_default(
  87. table_name,
  88. column_name,
  89. :datetime_with_timezone,
  90. default: options[:default],
  91. allow_null: options[:null]
  92. )
  93. else
  94. add_column(table_name, column_name, :datetime_with_timezone, **options)
  95. end
  96. end
  97. end
  98. # Creates a new index, concurrently when supported
  99. #
  100. # On PostgreSQL this method creates an index concurrently, on MySQL this
  101. # creates a regular index.
  102. #
  103. # Example:
  104. #
  105. # add_concurrent_index :users, :some_column
  106. #
  107. # See Rails' `add_index` for more info on the available arguments.
  108. def add_concurrent_index(table_name, column_name, **options)
  109. if transaction_open?
  110. raise 'add_concurrent_index can not be run inside a transaction, ' \
  111. 'you can disable transactions by calling disable_ddl_transaction! ' \
  112. 'in the body of your migration class'
  113. end
  114. options = options.merge({ algorithm: :concurrently })
  115. disable_statement_timeout
  116. add_index(table_name, column_name, **options)
  117. end
  118. # Removes an existed index, concurrently when supported
  119. #
  120. # On PostgreSQL this method removes an index concurrently.
  121. #
  122. # Example:
  123. #
  124. # remove_concurrent_index :users, :some_column
  125. #
  126. # See Rails' `remove_index` for more info on the available arguments.
  127. def remove_concurrent_index(table_name, column_name, **options)
  128. if transaction_open?
  129. raise 'remove_concurrent_index can not be run inside a transaction, ' \
  130. 'you can disable transactions by calling disable_ddl_transaction! ' \
  131. 'in the body of your migration class'
  132. end
  133. if supports_drop_index_concurrently?
  134. options = options.merge({ algorithm: :concurrently })
  135. disable_statement_timeout
  136. end
  137. remove_index(table_name, **options.merge({ column: column_name }))
  138. end
  139. # Removes an existing index, concurrently when supported
  140. #
  141. # On PostgreSQL this method removes an index concurrently.
  142. #
  143. # Example:
  144. #
  145. # remove_concurrent_index :users, "index_X_by_Y"
  146. #
  147. # See Rails' `remove_index` for more info on the available arguments.
  148. def remove_concurrent_index_by_name(table_name, index_name, **options)
  149. if transaction_open?
  150. raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
  151. 'you can disable transactions by calling disable_ddl_transaction! ' \
  152. 'in the body of your migration class'
  153. end
  154. if supports_drop_index_concurrently?
  155. options = options.merge({ algorithm: :concurrently })
  156. disable_statement_timeout
  157. end
  158. remove_index(table_name, **options.merge({ name: index_name }))
  159. end
  160. # Only available on Postgresql >= 9.2
  161. def supports_drop_index_concurrently?
  162. version = select_one("SELECT current_setting('server_version_num') AS v")['v'].to_i
  163. version >= 90200
  164. end
  165. # Adds a foreign key with only minimal locking on the tables involved.
  166. #
  167. # This method only requires minimal locking when using PostgreSQL. When
  168. # using MySQL this method will use Rails' default `add_foreign_key`.
  169. #
  170. # source - The source table containing the foreign key.
  171. # target - The target table the key points to.
  172. # column - The name of the column to create the foreign key on.
  173. # on_delete - The action to perform when associated data is removed,
  174. # defaults to "CASCADE".
  175. def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_col: 'id')
  176. # Transactions would result in ALTER TABLE locks being held for the
  177. # duration of the transaction, defeating the purpose of this method.
  178. if transaction_open?
  179. raise 'add_concurrent_foreign_key can not be run inside a transaction'
  180. end
  181. # While MySQL does allow disabling of foreign keys it has no equivalent
  182. # of PostgreSQL's "VALIDATE CONSTRAINT". As a result we'll just fall
  183. # back to the normal foreign key procedure.
  184. on_delete = 'SET NULL' if on_delete == :nullify
  185. disable_statement_timeout
  186. key_name = concurrent_foreign_key_name(source, column, target_col)
  187. # Using NOT VALID allows us to create a key without immediately
  188. # validating it. This means we keep the ALTER TABLE lock only for a
  189. # short period of time. The key _is_ enforced for any newly created
  190. # data.
  191. execute <<-EOF.strip_heredoc
  192. ALTER TABLE #{source}
  193. ADD CONSTRAINT #{key_name}
  194. FOREIGN KEY (#{column})
  195. REFERENCES #{target} (#{target_col})
  196. #{on_delete ? "ON DELETE #{on_delete.upcase}" : ''}
  197. NOT VALID;
  198. EOF
  199. # Validate the existing constraint. This can potentially take a very
  200. # long time to complete, but fortunately does not lock the source table
  201. # while running.
  202. execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{key_name};")
  203. end
  204. # Returns the name for a concurrent foreign key.
  205. #
  206. # PostgreSQL constraint names have a limit of 63 bytes. The logic used
  207. # here is based on Rails' foreign_key_name() method, which unfortunately
  208. # is private so we can't rely on it directly.
  209. def concurrent_foreign_key_name(table, column, target_col)
  210. "fk_#{Digest::SHA256.hexdigest("#{table}_#{column}_#{target_col}_fk").first(10)}"
  211. end
  212. # Long-running migrations may take more than the timeout allowed by
  213. # the database. Disable the session's statement timeout to ensure
  214. # migrations don't get killed prematurely. (PostgreSQL only)
  215. def disable_statement_timeout
  216. execute('SET statement_timeout TO 0')
  217. end
  218. # Updates the value of a column in batches.
  219. #
  220. # This method updates the table in batches of 5% of the total row count.
  221. # This method will continue updating rows until no rows remain.
  222. #
  223. # When given a block this method will yield two values to the block:
  224. #
  225. # 1. An instance of `Arel::Table` for the table that is being updated.
  226. # 2. The query to run as an Arel object.
  227. #
  228. # By supplying a block one can add extra conditions to the queries being
  229. # executed. Note that the same block is used for _all_ queries.
  230. #
  231. # Example:
  232. #
  233. # update_column_in_batches(:projects, :foo, 10) do |table, query|
  234. # query.where(table[:some_column].eq('hello'))
  235. # end
  236. #
  237. # This would result in this method updating only rows where
  238. # `projects.some_column` equals "hello".
  239. #
  240. # table - The name of the table.
  241. # column - The name of the column to update.
  242. # value - The value for the column.
  243. #
  244. # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
  245. # determines this method to be too complex while there's no way to make it
  246. # less "complex" without introducing extra methods (which actually will
  247. # make things _more_ complex).
  248. #
  249. # rubocop: disable Metrics/AbcSize
  250. def update_column_in_batches(table_name, column, value)
  251. if transaction_open?
  252. raise 'update_column_in_batches can not be run inside a transaction, ' \
  253. 'you can disable transactions by calling disable_ddl_transaction! ' \
  254. 'in the body of your migration class'
  255. end
  256. table = Arel::Table.new(table_name)
  257. total = estimate_rows_in_table(table_name).to_i
  258. if total < 1
  259. count_arel = table.project(Arel.star.count.as('count'))
  260. count_arel = yield table, count_arel if block_given?
  261. total = exec_query(count_arel.to_sql).to_ary.first['count'].to_i
  262. return if total == 0
  263. end
  264. # Update in batches of 5% until we run out of any rows to update.
  265. batch_size = ((total / 100.0) * 5.0).ceil
  266. max_size = 1000
  267. # The upper limit is 1000 to ensure we don't lock too many rows. For
  268. # example, for "merge_requests" even 1% of the table is around 35 000
  269. # rows for GitLab.com.
  270. batch_size = max_size if batch_size > max_size
  271. start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
  272. start_arel = yield table, start_arel if block_given?
  273. first_row = exec_query(start_arel.to_sql).to_ary.first
  274. # In case there are no rows but we didn't catch it in the estimated size:
  275. return unless first_row
  276. start_id = first_row['id'].to_i
  277. say "Migrating #{table_name}.#{column} (~#{total.to_i} rows)"
  278. started_time = Time.zone.now
  279. last_time = Time.zone.now
  280. migrated = 0
  281. loop do
  282. stop_row = nil
  283. suppress_messages do
  284. stop_arel = table.project(table[:id])
  285. .where(table[:id].gteq(start_id))
  286. .order(table[:id].asc)
  287. .take(1)
  288. .skip(batch_size)
  289. stop_arel = yield table, stop_arel if block_given?
  290. stop_row = exec_query(stop_arel.to_sql).to_ary.first
  291. update_arel = Arel::UpdateManager.new
  292. .table(table)
  293. .set([[table[column], value]])
  294. .where(table[:id].gteq(start_id))
  295. if stop_row
  296. stop_id = stop_row['id'].to_i
  297. start_id = stop_id
  298. update_arel = update_arel.where(table[:id].lt(stop_id))
  299. end
  300. update_arel = yield table, update_arel if block_given?
  301. execute(update_arel.to_sql)
  302. end
  303. migrated += batch_size
  304. if Time.zone.now - last_time > 1
  305. status = "Migrated #{migrated} rows"
  306. percentage = 100.0 * migrated / total
  307. status += " (~#{sprintf('%.2f', percentage)}%, "
  308. remaining_time = (100.0 - percentage) * (Time.zone.now - started_time) / percentage
  309. status += "#{(remaining_time / 60).to_i}:"
  310. status += sprintf('%02d', remaining_time.to_i % 60)
  311. status += ' remaining, '
  312. # Tell users not to interrupt if we're almost done.
  313. if remaining_time > 10
  314. status += 'safe to interrupt'
  315. else
  316. status += 'DO NOT interrupt'
  317. end
  318. status += ')'
  319. say status, true
  320. last_time = Time.zone.now
  321. end
  322. # There are no more rows left to update.
  323. break unless stop_row
  324. end
  325. end
  326. # Adds a column with a default value without locking an entire table.
  327. #
  328. # This method runs the following steps:
  329. #
  330. # 1. Add the column with a default value of NULL.
  331. # 2. Change the default value of the column to the specified value.
  332. # 3. Update all existing rows in batches.
  333. # 4. Set a `NOT NULL` constraint on the column if desired (the default).
  334. #
  335. # These steps ensure a column can be added to a large and commonly used
  336. # table without locking the entire table for the duration of the table
  337. # modification.
  338. #
  339. # table - The name of the table to update.
  340. # column - The name of the column to add.
  341. # type - The column type (e.g. `:integer`).
  342. # default - The default value for the column.
  343. # limit - Sets a column limit. For example, for :integer, the default is
  344. # 4-bytes. Set `limit: 8` to allow 8-byte integers.
  345. # allow_null - When set to `true` the column will allow NULL values, the
  346. # default is to not allow NULL values.
  347. #
  348. # This method can also take a block which is passed directly to the
  349. # `update_column_in_batches` method.
  350. def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block)
  351. if transaction_open?
  352. raise 'add_column_with_default can not be run inside a transaction, ' \
  353. 'you can disable transactions by calling disable_ddl_transaction! ' \
  354. 'in the body of your migration class'
  355. end
  356. disable_statement_timeout
  357. transaction do
  358. if limit
  359. add_column(table, column, type, default: nil, limit: limit)
  360. else
  361. add_column(table, column, type, default: nil)
  362. end
  363. # Changing the default before the update ensures any newly inserted
  364. # rows already use the proper default value.
  365. change_column_default(table, column, default)
  366. end
  367. begin
  368. update_column_in_batches(table, column, default, &block)
  369. change_column_null(table, column, false) unless allow_null
  370. # We want to rescue _all_ exceptions here, even those that don't inherit
  371. # from StandardError.
  372. rescue Exception => error # rubocop: disable all
  373. remove_column(table, column)
  374. raise error
  375. end
  376. end
  377. # Renames a column without requiring downtime.
  378. #
  379. # Concurrent renames work by using database triggers to ensure both the
  380. # old and new column are in sync. However, this method will _not_ remove
  381. # the triggers or the old column automatically; this needs to be done
  382. # manually in a post-deployment migration. This can be done using the
  383. # method `cleanup_concurrent_column_rename`.
  384. #
  385. # table - The name of the database table containing the column.
  386. # old - The old column name.
  387. # new - The new column name.
  388. # type - The type of the new column. If no type is given the old column's
  389. # type is used.
  390. def rename_column_concurrently(table, old, new, type: nil)
  391. if transaction_open?
  392. raise 'rename_column_concurrently can not be run inside a transaction'
  393. end
  394. check_trigger_permissions!(table)
  395. trigger_name = rename_trigger_name(table, old, new)
  396. # If we were in the middle of update_column_in_batches, we should remove
  397. # the old column and start over, as we have no idea where we were.
  398. if column_for(table, new)
  399. remove_rename_triggers_for_postgresql(table, trigger_name)
  400. remove_column(table, new)
  401. end
  402. old_col = column_for(table, old)
  403. new_type = type || old_col.type
  404. col_opts = {
  405. precision: old_col.precision,
  406. scale: old_col.scale,
  407. }
  408. # We may be trying to reset the limit on an integer column type, so let
  409. # Rails handle that.
  410. unless [:bigint, :integer].include?(new_type)
  411. col_opts[:limit] = old_col.limit
  412. end
  413. add_column(table, new, new_type, **col_opts)
  414. # We set the default value _after_ adding the column so we don't end up
  415. # updating any existing data with the default value. This isn't
  416. # necessary since we copy over old values further down.
  417. change_column_default(table, new, old_col.default) if old_col.default
  418. quoted_table = quote_table_name(table)
  419. quoted_old = quote_column_name(old)
  420. quoted_new = quote_column_name(new)
  421. install_rename_triggers_for_postgresql(trigger_name, quoted_table,
  422. quoted_old, quoted_new)
  423. update_column_in_batches(table, new, Arel::Table.new(table)[old])
  424. change_column_null(table, new, false) unless old_col.null
  425. copy_indexes(table, old, new)
  426. copy_foreign_keys(table, old, new)
  427. end
  428. # Changes the type of a column concurrently.
  429. #
  430. # table - The table containing the column.
  431. # column - The name of the column to change.
  432. # new_type - The new column type.
  433. def change_column_type_concurrently(table, column, new_type)
  434. temp_column = rename_column_name(column)
  435. rename_column_concurrently(table, column, temp_column, type: new_type)
  436. # Primary keys don't necessarily have an associated index.
  437. if ActiveRecord::Base.get_primary_key(table) == column.to_s
  438. old_pk_index_name = "index_#{table}_on_#{column}"
  439. new_pk_index_name = "index_#{table}_on_#{column}_cm"
  440. unless indexes_for(table, column).find{|i| i.name == old_pk_index_name}
  441. add_concurrent_index(table, [temp_column],
  442. unique: true,
  443. name: new_pk_index_name
  444. )
  445. end
  446. end
  447. end
  448. # Performs cleanup of a concurrent type change.
  449. #
  450. # table - The table containing the column.
  451. # column - The name of the column to change.
  452. # new_type - The new column type.
  453. def cleanup_concurrent_column_type_change(table, column)
  454. temp_column = rename_column_name(column)
  455. # Wait for the indices to be built
  456. indexes_for(table, column).each do |index|
  457. expected_name = index.name + '_cm'
  458. puts "Waiting for index #{expected_name}"
  459. sleep 1 until indexes_for(table, temp_column).find {|i| i.name == expected_name }
  460. end
  461. was_primary = (ActiveRecord::Base.get_primary_key(table) == column.to_s)
  462. old_default_fn = column_for(table, column).default_function
  463. old_fks = []
  464. if was_primary
  465. # Get any foreign keys pointing at this column we need to recreate, and
  466. # remove the old ones.
  467. # Based on code from:
  468. # http://errorbank.blogspot.com/2011/03/list-all-foreign-keys-references-for.html
  469. old_fks_res = execute <<-EOF.strip_heredoc
  470. select m.relname as src_table,
  471. (select a.attname
  472. from pg_attribute a
  473. where a.attrelid = m.oid
  474. and a.attnum = o.conkey[1]
  475. and a.attisdropped = false) as src_col,
  476. o.conname as name,
  477. o.confdeltype as on_delete
  478. from pg_constraint o
  479. left join pg_class f on f.oid = o.confrelid
  480. left join pg_class c on c.oid = o.conrelid
  481. left join pg_class m on m.oid = o.conrelid
  482. where o.contype = 'f'
  483. and o.conrelid in (
  484. select oid from pg_class c where c.relkind = 'r')
  485. and f.relname = '#{table}';
  486. EOF
  487. old_fks = old_fks_res.to_a
  488. old_fks.each do |old_fk|
  489. add_concurrent_foreign_key(
  490. old_fk['src_table'],
  491. table,
  492. column: old_fk['src_col'],
  493. target_col: temp_column,
  494. on_delete: extract_foreign_key_action(old_fk['on_delete'])
  495. )
  496. remove_foreign_key(old_fk['src_table'], name: old_fk['name'])
  497. end
  498. end
  499. # If there was a sequence owned by the old column, make it owned by the
  500. # new column, as it will otherwise be deleted when we get rid of the
  501. # old column.
  502. if (seq_match = /^nextval\('([^']*)'(::text|::regclass)?\)/.match(old_default_fn))
  503. seq_name = seq_match[1]
  504. execute("ALTER SEQUENCE #{seq_name} OWNED BY #{table}.#{temp_column}")
  505. end
  506. transaction do
  507. # This has to be performed in a transaction as otherwise we might have
  508. # inconsistent data.
  509. cleanup_concurrent_column_rename(table, column, temp_column)
  510. rename_column(table, temp_column, column)
  511. # If there was an old default function, we didn't copy it. Do that now
  512. # in the transaction, so we don't miss anything.
  513. change_column_default(table, column, -> { old_default_fn }) if old_default_fn
  514. end
  515. # Rename any indices back to what they should be.
  516. indexes_for(table, column).each do |index|
  517. next unless index.name.end_with?('_cm')
  518. real_index_name = index.name.sub(/_cm$/, '')
  519. rename_index(table, index.name, real_index_name)
  520. end
  521. # Rename any foreign keys back to names based on the real column.
  522. foreign_keys_for(table, column).each do |fk|
  523. old_fk_name = concurrent_foreign_key_name(fk.from_table, temp_column, 'id')
  524. new_fk_name = concurrent_foreign_key_name(fk.from_table, column, 'id')
  525. execute("ALTER TABLE #{fk.from_table} RENAME CONSTRAINT " +
  526. "#{old_fk_name} TO #{new_fk_name}")
  527. end
  528. # Rename any foreign keys from other tables to names based on the real
  529. # column.
  530. old_fks.each do |old_fk|
  531. old_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
  532. old_fk['src_col'], temp_column)
  533. new_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
  534. old_fk['src_col'], column)
  535. execute("ALTER TABLE #{old_fk['src_table']} RENAME CONSTRAINT " +
  536. "#{old_fk_name} TO #{new_fk_name}")
  537. end
  538. # If the old column was a primary key, mark the new one as a primary key.
  539. if was_primary
  540. execute("ALTER TABLE #{table} ADD PRIMARY KEY USING INDEX " +
  541. "index_#{table}_on_#{column}")
  542. end
  543. end
  544. # Cleans up a concurrent column name.
  545. #
  546. # This method takes care of removing previously installed triggers as well
  547. # as removing the old column.
  548. #
  549. # table - The name of the database table.
  550. # old - The name of the old column.
  551. # new - The name of the new column.
  552. def cleanup_concurrent_column_rename(table, old, new)
  553. trigger_name = rename_trigger_name(table, old, new)
  554. check_trigger_permissions!(table)
  555. remove_rename_triggers_for_postgresql(table, trigger_name)
  556. remove_column(table, old)
  557. end
  558. # Performs a concurrent column rename when using PostgreSQL.
  559. def install_rename_triggers_for_postgresql(trigger, table, old, new)
  560. execute <<-EOF.strip_heredoc
  561. CREATE OR REPLACE FUNCTION #{trigger}()
  562. RETURNS trigger AS
  563. $BODY$
  564. BEGIN
  565. NEW.#{new} := NEW.#{old};
  566. RETURN NEW;
  567. END;
  568. $BODY$
  569. LANGUAGE 'plpgsql'
  570. VOLATILE
  571. EOF
  572. execute <<-EOF.strip_heredoc
  573. CREATE TRIGGER #{trigger}
  574. BEFORE INSERT OR UPDATE
  575. ON #{table}
  576. FOR EACH ROW
  577. EXECUTE PROCEDURE #{trigger}()
  578. EOF
  579. end
  580. # Installs the triggers necessary to perform a concurrent column rename on
  581. # MySQL.
  582. def install_rename_triggers_for_mysql(trigger, table, old, new)
  583. execute <<-EOF.strip_heredoc
  584. CREATE TRIGGER #{trigger}_insert
  585. BEFORE INSERT
  586. ON #{table}
  587. FOR EACH ROW
  588. SET NEW.#{new} = NEW.#{old}
  589. EOF
  590. execute <<-EOF.strip_heredoc
  591. CREATE TRIGGER #{trigger}_update
  592. BEFORE UPDATE
  593. ON #{table}
  594. FOR EACH ROW
  595. SET NEW.#{new} = NEW.#{old}
  596. EOF
  597. end
  598. # Removes the triggers used for renaming a PostgreSQL column concurrently.
  599. def remove_rename_triggers_for_postgresql(table, trigger)
  600. execute("DROP TRIGGER IF EXISTS #{trigger} ON #{table}")
  601. execute("DROP FUNCTION IF EXISTS #{trigger}()")
  602. end
  603. # Removes the triggers used for renaming a MySQL column concurrently.
  604. def remove_rename_triggers_for_mysql(trigger)
  605. execute("DROP TRIGGER IF EXISTS #{trigger}_insert")
  606. execute("DROP TRIGGER IF EXISTS #{trigger}_update")
  607. end
  608. # Returns the (base) name to use for triggers when renaming columns.
  609. def rename_trigger_name(table, old, new)
  610. 'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old}_#{new}").first(12)
  611. end
  612. # Returns the name to use for temporary rename columns.
  613. def rename_column_name(base)
  614. base.to_s + '_cm'
  615. end
  616. # Returns an Array containing the indexes for the given column
  617. def indexes_for(table, column)
  618. column = column.to_s
  619. indexes(table).select { |index| index.columns.include?(column) }
  620. end
  621. # Returns an Array containing the foreign keys for the given column.
  622. def foreign_keys_for(table, column)
  623. column = column.to_s
  624. foreign_keys(table).select { |fk| fk.column == column }
  625. end
  626. # Copies all indexes for the old column to a new column.
  627. #
  628. # table - The table containing the columns and indexes.
  629. # old - The old column.
  630. # new - The new column.
  631. def copy_indexes(table, old, new)
  632. old = old.to_s
  633. new = new.to_s
  634. indexes_for(table, old).each do |index|
  635. new_columns = index.columns.map do |column|
  636. column == old ? new : column
  637. end
  638. # This is necessary as we can't properly rename indexes such as
  639. # "ci_taggings_idx".
  640. name = index.name + '_cm'
  641. # If the order contained the old column, map it to the new one.
  642. order = index.orders
  643. if order.key?(old)
  644. order[new] = order.delete(old)
  645. end
  646. options = {
  647. unique: index.unique,
  648. name: name,
  649. length: index.lengths,
  650. order: order
  651. }
  652. # These options are not supported by MySQL, so we only add them if
  653. # they were previously set.
  654. options[:using] = index.using if index.using
  655. options[:where] = index.where if index.where
  656. add_concurrent_index(table, new_columns, **options)
  657. end
  658. end
  659. # Copies all foreign keys for the old column to the new column.
  660. #
  661. # table - The table containing the columns and indexes.
  662. # old - The old column.
  663. # new - The new column.
  664. def copy_foreign_keys(table, old, new)
  665. foreign_keys_for(table, old).each do |fk|
  666. add_concurrent_foreign_key(fk.from_table,
  667. fk.to_table,
  668. column: new,
  669. on_delete: fk.on_delete)
  670. end
  671. end
  672. # Returns the column for the given table and column name.
  673. def column_for(table, name)
  674. name = name.to_s
  675. columns(table).find { |column| column.name == name }
  676. end
  677. # Update the configuration of an index by creating a new one and then
  678. # removing the old one
  679. def update_index(table_name, index_name, columns, **index_options)
  680. if index_name_exists?(table_name, "#{index_name}_new") && index_name_exists?(table_name, index_name)
  681. remove_index table_name, "#{index_name}_new"
  682. end
  683. begin
  684. add_index table_name, columns, **index_options.merge(name: "#{index_name}_new", algorithm: :concurrently)
  685. rescue ActiveRecord::RecordNotUnique
  686. remove_index table_name, name: "#{index_name}_new"
  687. raise CorruptionError.new(index_name)
  688. end
  689. remove_index table_name, name: index_name if index_name_exists?(table_name, index_name)
  690. rename_index table_name, "#{index_name}_new", index_name
  691. end
  692. # This will replace the first occurrence of a string in a column with
  693. # the replacement
  694. # On postgresql we can use `regexp_replace` for that.
  695. # On mysql we find the location of the pattern, and overwrite it
  696. # with the replacement
  697. def replace_sql(column, pattern, replacement)
  698. quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
  699. quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
  700. replace = Arel::Nodes::NamedFunction
  701. .new("regexp_replace", [column, quoted_pattern, quoted_replacement])
  702. Arel::Nodes::SqlLiteral.new(replace.to_sql)
  703. end
  704. def remove_foreign_key_without_error(*args)
  705. remove_foreign_key(*args)
  706. rescue ArgumentError
  707. end
  708. def sidekiq_queue_migrate(queue_from, to:)
  709. while sidekiq_queue_length(queue_from) > 0
  710. Sidekiq.redis do |conn|
  711. conn.rpoplpush "queue:#{queue_from}", "queue:#{to}"
  712. end
  713. end
  714. end
  715. def sidekiq_queue_length(queue_name)
  716. Sidekiq.redis do |conn|
  717. conn.llen("queue:#{queue_name}")
  718. end
  719. end
  720. def check_trigger_permissions!(table)
  721. unless Grant.create_and_execute_trigger?(table)
  722. dbname = ActiveRecord::Base.configurations[Rails.env]['database']
  723. user = ActiveRecord::Base.configurations[Rails.env]['username'] || ENV['USER']
  724. raise <<-EOF
  725. Your database user is not allowed to create, drop, or execute triggers on the
  726. table #{table}.
  727. If you are using PostgreSQL you can solve this by logging in to the Mastodon
  728. database (#{dbname}) using a super user and running:
  729. ALTER USER #{user} WITH SUPERUSER
  730. The query will grant the user super user permissions, ensuring you don't run
  731. into similar problems in the future (e.g. when new tables are created).
  732. EOF
  733. end
  734. end
  735. # Bulk queues background migration jobs for an entire table, batched by ID range.
  736. # "Bulk" meaning many jobs will be pushed at a time for efficiency.
  737. # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
  738. #
  739. # model_class - The table being iterated over
  740. # job_class_name - The background migration job class as a string
  741. # batch_size - The maximum number of rows per job
  742. #
  743. # Example:
  744. #
  745. # class Route < ActiveRecord::Base
  746. # include EachBatch
  747. # self.table_name = 'routes'
  748. # end
  749. #
  750. # bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
  751. #
  752. # Where the model_class includes EachBatch, and the background migration exists:
  753. #
  754. # class Gitlab::BackgroundMigration::ProcessRoutes
  755. # def perform(start_id, end_id)
  756. # # do something
  757. # end
  758. # end
  759. def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
  760. raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
  761. jobs = []
  762. model_class.each_batch(of: batch_size) do |relation|
  763. start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
  764. if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
  765. # Note: This code path generally only helps with many millions of rows
  766. # We push multiple jobs at a time to reduce the time spent in
  767. # Sidekiq/Redis operations. We're using this buffer based approach so we
  768. # don't need to run additional queries for every range.
  769. BackgroundMigrationWorker.perform_bulk(jobs)
  770. jobs.clear
  771. end
  772. jobs << [job_class_name, [start_id, end_id]]
  773. end
  774. BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty?
  775. end
  776. # Queues background migration jobs for an entire table, batched by ID range.
  777. # Each job is scheduled with a `delay_interval` in between.
  778. # If you use a small interval, then some jobs may run at the same time.
  779. #
  780. # model_class - The table being iterated over
  781. # job_class_name - The background migration job class as a string
  782. # delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
  783. # batch_size - The maximum number of rows per job
  784. #
  785. # Example:
  786. #
  787. # class Route < ActiveRecord::Base
  788. # include EachBatch
  789. # self.table_name = 'routes'
  790. # end
  791. #
  792. # queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
  793. #
  794. # Where the model_class includes EachBatch, and the background migration exists:
  795. #
  796. # class Gitlab::BackgroundMigration::ProcessRoutes
  797. # def perform(start_id, end_id)
  798. # # do something
  799. # end
  800. # end
  801. def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
  802. raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
  803. model_class.each_batch(of: batch_size) do |relation, index|
  804. start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
  805. # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
  806. # the same time, which is not helpful in most cases where we wish to
  807. # spread the work over time.
  808. BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
  809. end
  810. end
  811. private
  812. # https://github.com/rails/rails/blob/v5.2.0/activerecord/lib/active_record/connection_adapters/postgresql/schema_statements.rb#L678-L684
  813. def extract_foreign_key_action(specifier)
  814. case specifier
  815. when 'c'; :cascade
  816. when 'n'; :nullify
  817. when 'r'; :restrict
  818. end
  819. end
  820. end
  821. end
  822. # rubocop:enable all