language_detector_spec.rb 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. # frozen_string_literal: true
  2. require 'rails_helper'
  3. describe LanguageDetector do
  4. describe 'prepare_text' do
  5. it 'returns unmodified string without special cases' do
  6. string = 'just a regular string'
  7. result = described_class.instance.send(:prepare_text, string)
  8. expect(result).to eq string
  9. end
  10. it 'collapses spacing in strings' do
  11. string = 'The formatting in this is very odd'
  12. result = described_class.instance.send(:prepare_text, string)
  13. expect(result).to eq 'The formatting in this is very odd'
  14. end
  15. it 'strips usernames from strings before detection' do
  16. string = '@username Yeah, very surreal...! also @friend'
  17. result = described_class.instance.send(:prepare_text, string)
  18. expect(result).to eq 'Yeah, very surreal...! also'
  19. end
  20. it 'strips URLs from strings before detection' do
  21. string = 'Our website is https://example.com and also http://localhost.dev'
  22. result = described_class.instance.send(:prepare_text, string)
  23. expect(result).to eq 'Our website is and also'
  24. end
  25. it 'converts #hashtags back to normal text before detection' do
  26. string = 'Hey look at all the #animals and #FishAndChips'
  27. result = described_class.instance.send(:prepare_text, string)
  28. expect(result).to eq 'Hey look at all the animals and fish and chips'
  29. end
  30. end
  31. describe 'detect' do
  32. let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
  33. let(:account_remote) { Fabricate(:account, domain: 'joinmastodon.org') }
  34. it 'detects english language for basic strings' do
  35. strings = [
  36. "Hello and welcome to mastodon how are you today?",
  37. "I'd rather not!",
  38. "a lot of people just want to feel righteous all the time and that's all that matters",
  39. ]
  40. strings.each do |string|
  41. result = described_class.instance.detect(string, account_without_user_locale)
  42. expect(result).to eq(:en), string
  43. end
  44. end
  45. it 'detects spanish language' do
  46. string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
  47. result = described_class.instance.detect(string, account_without_user_locale)
  48. expect(result).to eq :es
  49. end
  50. describe 'when language can\'t be detected' do
  51. it 'uses nil when sent an empty document' do
  52. result = described_class.instance.detect('', account_without_user_locale)
  53. expect(result).to eq nil
  54. end
  55. describe 'because of a URL' do
  56. it 'uses nil when sent just a URL' do
  57. string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
  58. cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
  59. expect(cld_result).not_to eq :en
  60. result = described_class.instance.detect(string, account_without_user_locale)
  61. expect(result).to eq nil
  62. end
  63. end
  64. describe 'with an account' do
  65. it 'uses the account locale when present' do
  66. account = double(user_locale: 'fr')
  67. result = described_class.instance.detect('', account)
  68. expect(result).to eq nil
  69. end
  70. it 'uses nil when account is present but has no locale' do
  71. result = described_class.instance.detect('', account_without_user_locale)
  72. expect(result).to eq nil
  73. end
  74. end
  75. describe 'with an `en` default locale' do
  76. it 'uses nil for undetectable string' do
  77. result = described_class.instance.detect('', account_without_user_locale)
  78. expect(result).to eq nil
  79. end
  80. end
  81. describe 'remote user' do
  82. it 'detects Korean language' do
  83. string = '안녕하세요'
  84. result = described_class.instance.detect(string, account_remote)
  85. expect(result).to eq :ko
  86. end
  87. end
  88. describe 'with a non-`en` default locale' do
  89. around(:each) do |example|
  90. before = I18n.default_locale
  91. I18n.default_locale = :ja
  92. example.run
  93. I18n.default_locale = before
  94. end
  95. it 'uses nil for undetectable string' do
  96. string = ''
  97. result = described_class.instance.detect(string, account_without_user_locale)
  98. expect(result).to eq nil
  99. end
  100. end
  101. end
  102. end
  103. end