search.rb 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # frozen_string_literal: true
  2. module Account::Search
  3. extend ActiveSupport::Concern
  4. DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’]/
  5. TEXT_SEARCH_RANKS = <<~SQL.squish
  6. (
  7. setweight(to_tsvector('simple', accounts.display_name), 'A') ||
  8. setweight(to_tsvector('simple', accounts.username), 'B') ||
  9. setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C')
  10. )
  11. SQL
  12. REPUTATION_SCORE_FUNCTION = <<~SQL.squish
  13. (
  14. greatest(0, coalesce(s.followers_count, 0)) / (
  15. greatest(0, coalesce(s.following_count, 0)) + 1.0
  16. )
  17. )
  18. SQL
  19. FOLLOWERS_SCORE_FUNCTION = <<~SQL.squish
  20. log(
  21. greatest(0, coalesce(s.followers_count, 0)) + 2
  22. )
  23. SQL
  24. TIME_DISTANCE_FUNCTION = <<~SQL.squish
  25. (
  26. case
  27. when s.last_status_at is null then 0
  28. else exp(
  29. -1.0 * (
  30. (
  31. greatest(0, abs(extract(DAY FROM age(s.last_status_at))) - 30.0)^2) /#{' '}
  32. (2.0 * ((-1.0 * 30^2) / (2.0 * ln(0.3)))
  33. )
  34. )
  35. )
  36. end
  37. )
  38. SQL
  39. BOOST = <<~SQL.squish
  40. (
  41. (#{REPUTATION_SCORE_FUNCTION} + #{FOLLOWERS_SCORE_FUNCTION} + #{TIME_DISTANCE_FUNCTION}) / 3.0
  42. )
  43. SQL
  44. BASIC_SEARCH_SQL = <<~SQL.squish
  45. SELECT
  46. accounts.*,
  47. #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank
  48. FROM accounts
  49. LEFT JOIN users ON accounts.id = users.account_id
  50. LEFT JOIN account_stats AS s ON accounts.id = s.account_id
  51. WHERE to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS}
  52. AND accounts.suspended_at IS NULL
  53. AND accounts.moved_to_account_id IS NULL
  54. AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL))
  55. ORDER BY rank DESC
  56. LIMIT :limit OFFSET :offset
  57. SQL
  58. ADVANCED_SEARCH_WITH_FOLLOWING = <<~SQL.squish
  59. WITH first_degree AS (
  60. SELECT target_account_id
  61. FROM follows
  62. WHERE account_id = :id
  63. UNION ALL
  64. SELECT :id
  65. )
  66. SELECT
  67. accounts.*,
  68. (count(f.id) + 1) * #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank
  69. FROM accounts
  70. LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id)
  71. LEFT JOIN account_stats AS s ON accounts.id = s.account_id
  72. WHERE accounts.id IN (SELECT * FROM first_degree)
  73. AND to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS}
  74. AND accounts.suspended_at IS NULL
  75. AND accounts.moved_to_account_id IS NULL
  76. GROUP BY accounts.id, s.id
  77. ORDER BY rank DESC
  78. LIMIT :limit OFFSET :offset
  79. SQL
  80. ADVANCED_SEARCH_WITHOUT_FOLLOWING = <<~SQL.squish
  81. SELECT
  82. accounts.*,
  83. #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank,
  84. count(f.id) AS followed
  85. FROM accounts
  86. LEFT OUTER JOIN follows AS f ON
  87. (accounts.id = f.account_id AND f.target_account_id = :id) OR (accounts.id = f.target_account_id AND f.account_id = :id)
  88. LEFT JOIN users ON accounts.id = users.account_id
  89. LEFT JOIN account_stats AS s ON accounts.id = s.account_id
  90. WHERE to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS}
  91. AND accounts.suspended_at IS NULL
  92. AND accounts.moved_to_account_id IS NULL
  93. AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL))
  94. GROUP BY accounts.id, s.id
  95. ORDER BY followed DESC, rank DESC
  96. LIMIT :limit OFFSET :offset
  97. SQL
  98. DEFAULT_LIMIT = 10
  99. def searchable_text
  100. PlainTextFormatter.new(note, local?).to_s if discoverable?
  101. end
  102. def searchable_properties
  103. [].tap do |properties|
  104. properties << 'bot' if bot?
  105. properties << 'verified' if fields.any?(&:verified?)
  106. properties << 'discoverable' if discoverable?
  107. end
  108. end
  109. class_methods do
  110. def search_for(terms, limit: DEFAULT_LIMIT, offset: 0)
  111. tsquery = generate_query_for_search(terms)
  112. find_by_sql([BASIC_SEARCH_SQL, { limit: limit, offset: offset, tsquery: tsquery }]).tap do |records|
  113. ActiveRecord::Associations::Preloader.new(records: records, associations: [:account_stat, { user: :role }]).call
  114. end
  115. end
  116. def advanced_search_for(terms, account, limit: DEFAULT_LIMIT, following: false, offset: 0)
  117. tsquery = generate_query_for_search(terms)
  118. sql_template = following ? ADVANCED_SEARCH_WITH_FOLLOWING : ADVANCED_SEARCH_WITHOUT_FOLLOWING
  119. find_by_sql([sql_template, { id: account.id, limit: limit, offset: offset, tsquery: tsquery }]).tap do |records|
  120. ActiveRecord::Associations::Preloader.new(records: records, associations: [:account_stat, { user: :role }]).call
  121. end
  122. end
  123. private
  124. def generate_query_for_search(unsanitized_terms)
  125. terms = unsanitized_terms.gsub(DISALLOWED_TSQUERY_CHARACTERS, ' ')
  126. # The final ":*" is for prefix search.
  127. # The trailing space does not seem to fit any purpose, but `to_tsquery`
  128. # behaves differently with and without a leading space if the terms start
  129. # with `./`, `../`, or `.. `. I don't understand why, so, in doubt, keep
  130. # the same query.
  131. "' #{terms} ':*"
  132. end
  133. end
  134. end