20170920032311_fix_reblogs_in_feeds.rb 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. # frozen_string_literal: true
  2. class FixReblogsInFeeds < ActiveRecord::Migration[5.1]
  3. def up
  4. redis = RedisConnection.pool.checkout
  5. fm = FeedManager.instance
  6. # Old scheme:
  7. # Each user's feed zset had a series of score:value entries,
  8. # where "regular" statuses had the same score and value (their
  9. # ID). Reblogs had a score of the reblogging status' ID, and a
  10. # value of the reblogged status' ID.
  11. # New scheme:
  12. # The feed contains only entries with the same score and value.
  13. # Reblogs result in the reblogging status being added to the
  14. # feed, with an entry in a reblog tracking zset (where the score
  15. # is once again set to the reblogging status' ID, and the value
  16. # is set to the reblogged status' ID). This is safe for Redis'
  17. # float conversion because in this reblog tracking zset, we only
  18. # need the reblogging status' ID to be able to stop tracking
  19. # entries after they have gotten too far down the feed, which
  20. # does not require an exact value.
  21. # This process reads all feeds and writes 3 times for each reblogs.
  22. # So we use Lua script to avoid overhead between Ruby and Redis.
  23. script = <<-LUA
  24. local timeline_key = KEYS[1]
  25. local reblog_key = KEYS[2]
  26. -- So, first, we iterate over the user's feed to find any reblogs.
  27. local items = redis.call('zrange', timeline_key, 0, -1, 'withscores')
  28. for i = 1, #items, 2 do
  29. local reblogged_id = items[i]
  30. local reblogging_id = items[i + 1]
  31. if (reblogged_id ~= reblogging_id) then
  32. -- The score and value don't match, so this is a reblog.
  33. -- (note that we're transitioning from IDs < 53 bits so we
  34. -- don't have to worry about the loss of precision)
  35. -- Remove the old entry
  36. redis.call('zrem', timeline_key, reblogged_id)
  37. -- Add a new one for the reblogging status
  38. redis.call('zadd', timeline_key, reblogging_id, reblogging_id)
  39. -- Track the fact that this was a reblog
  40. redis.call('zadd', reblog_key, reblogging_id, reblogged_id)
  41. end
  42. end
  43. LUA
  44. script_hash = redis.script(:load, script)
  45. # find_each is batched on the database side.
  46. User.includes(:account).find_each do |user|
  47. account = user.account
  48. timeline_key = fm.key(:home, account.id)
  49. reblog_key = fm.key(:home, account.id, 'reblogs')
  50. redis.evalsha(script_hash, [timeline_key, reblog_key])
  51. end
  52. end
  53. def down
  54. # We *deliberately* do nothing here. This means that reverting
  55. # this and the associated changes to the FeedManager code could
  56. # allow one superfluous reblog of any given status, but in the case
  57. # where things have gone wrong and a revert is necessary, this
  58. # appears preferable to requiring a database hit for every status
  59. # in every users' feed simply to revert.
  60. # Note that this is operating under the assumption that entries
  61. # with >53-bit IDs have already been entered. Otherwise, we could
  62. # just use the data in Redis to reverse this transition.
  63. end
  64. end