Browse Source

Change search indexing to use batches to minimize resource usage (#18451)

Eugen Rochko 1 year ago
parent
commit
679b7158e3

+ 1 - 1
Gemfile

@@ -81,7 +81,7 @@ gem 'scenic', '~> 1.6'
 gem 'sidekiq', '~> 6.4'
 gem 'sidekiq-scheduler', '~> 4.0'
 gem 'sidekiq-unique-jobs', '~> 7.1'
-gem 'sidekiq-bulk', '~>0.2.0'
+gem 'sidekiq-bulk', '~> 0.2.0'
 gem 'simple-navigation', '~> 4.3'
 gem 'simple_form', '~> 5.1'
 gem 'sprockets-rails', '~> 3.4', require: 'sprockets/railtie'

+ 1 - 1
app/chewy/accounts_index.rb

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 class AccountsIndex < Chewy::Index
-  settings index: { refresh_interval: '5m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     analyzer: {
       content: {
         tokenizer: 'whitespace',

+ 1 - 1
app/chewy/statuses_index.rb

@@ -3,7 +3,7 @@
 class StatusesIndex < Chewy::Index
   include FormattingHelper
 
-  settings index: { refresh_interval: '15m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     filter: {
       english_stop: {
         type: 'stop',

+ 1 - 1
app/chewy/tags_index.rb

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 class TagsIndex < Chewy::Index
-  settings index: { refresh_interval: '15m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     analyzer: {
       content: {
         tokenizer: 'keyword',

+ 26 - 0
app/workers/scheduler/indexing_scheduler.rb

@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+class Scheduler::IndexingScheduler
+  include Sidekiq::Worker
+  include Redisable
+
+  sidekiq_options retry: 0
+
+  def perform
+    indexes.each do |type|
+      with_redis do |redis|
+        ids = redis.smembers("chewy:queue:#{type.name}")
+
+        type.import!(ids)
+
+        redis.pipelined do |pipeline|
+          ids.each { |id| pipeline.srem("chewy:queue:#{type.name}", id) }
+        end
+      end
+    end
+  end
+
+  def indexes
+    [AccountsIndex, TagsIndex, StatusesIndex]
+  end
+end

+ 1 - 1
config/application.rb

@@ -38,7 +38,7 @@ require_relative '../lib/mastodon/version'
 require_relative '../lib/mastodon/rack_middleware'
 require_relative '../lib/devise/two_factor_ldap_authenticatable'
 require_relative '../lib/devise/two_factor_pam_authenticatable'
-require_relative '../lib/chewy/strategy/custom_sidekiq'
+require_relative '../lib/chewy/strategy/mastodon'
 require_relative '../lib/webpacker/manifest_extensions'
 require_relative '../lib/webpacker/helper_extensions'
 require_relative '../lib/rails/engine_extensions'

+ 2 - 3
config/initializers/chewy.rb

@@ -13,15 +13,14 @@ Chewy.settings = {
   journal: false,
   user: user,
   password: password,
-  sidekiq: { queue: 'pull' },
 }
 
 # We use our own async strategy even outside the request-response
 # cycle, which takes care of checking if Elasticsearch is enabled
 # or not. However, mind that for the Rails console, the :urgent
 # strategy is set automatically with no way to override it.
-Chewy.root_strategy              = :custom_sidekiq
-Chewy.request_strategy           = :custom_sidekiq
+Chewy.root_strategy              = :mastodon
+Chewy.request_strategy           = :mastodon
 Chewy.use_after_commit_callbacks = false
 
 module Chewy

+ 4 - 0
config/sidekiq.yml

@@ -21,6 +21,10 @@
     every: '6h'
     class: Scheduler::Trends::ReviewNotificationsScheduler
     queue: scheduler
+  indexing_scheduler:
+    every: '5m'
+    class: Scheduler::IndexingScheduler
+    queue: scheduler
   media_cleanup_scheduler:
     cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
     class: Scheduler::MediaCleanupScheduler

+ 0 - 11
lib/chewy/strategy/custom_sidekiq.rb

@@ -1,11 +0,0 @@
-# frozen_string_literal: true
-
-module Chewy
-  class Strategy
-    class CustomSidekiq < Sidekiq
-      def update(_type, _objects, _options = {})
-        super if Chewy.enabled?
-      end
-    end
-  end
-end

+ 27 - 0
lib/chewy/strategy/mastodon.rb

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Chewy
+  class Strategy
+    class Mastodon < Base
+      def initialize
+        super
+
+        @stash = Hash.new { |hash, key| hash[key] = [] }
+      end
+
+      def update(type, objects, _options = {})
+        @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled?
+      end
+
+      def leave
+        RedisConfiguration.with do |redis|
+          redis.pipelined do |pipeline|
+            @stash.each do |type, ids|
+              pipeline.sadd("chewy:queue:#{type.name}", ids)
+            end
+          end
+        end
+      end
+    end
+  end
+end