sanitize_config.rb 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # frozen_string_literal: true
  2. class Sanitize
  3. module Config
  4. HTTP_PROTOCOLS = %w(
  5. http
  6. https
  7. ).freeze
  8. LINK_PROTOCOLS = %w(
  9. http
  10. https
  11. dat
  12. dweb
  13. ipfs
  14. ipns
  15. ssb
  16. gopher
  17. xmpp
  18. magnet
  19. gemini
  20. ).freeze
  21. CLASS_WHITELIST_TRANSFORMER = lambda do |env|
  22. node = env[:node]
  23. class_list = node['class']&.split(/[\t\n\f\r ]/)
  24. return unless class_list
  25. class_list.keep_if do |e|
  26. next true if /^(h|p|u|dt|e)-/.match?(e) # microformats classes
  27. next true if /^(mention|hashtag)$/.match?(e) # semantic classes
  28. next true if /^(ellipsis|invisible)$/.match?(e) # link formatting classes
  29. end
  30. node['class'] = class_list.join(' ')
  31. end
  32. UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
  33. return unless env[:node_name] == 'a'
  34. current_node = env[:node]
  35. scheme = begin
  36. if current_node['href'] =~ Sanitize::REGEX_PROTOCOL
  37. Regexp.last_match(1).downcase
  38. else
  39. :relative
  40. end
  41. end
  42. current_node.replace(Nokogiri::XML::Text.new(current_node.text, current_node.document)) unless LINK_PROTOCOLS.include?(scheme)
  43. end
  44. UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|
  45. return unless %w(h1 h2 h3 h4 h5 h6 blockquote pre ul ol li).include?(env[:node_name])
  46. current_node = env[:node]
  47. case env[:node_name]
  48. when 'li'
  49. current_node.traverse do |node|
  50. next unless %w(p ul ol li).include?(node.name)
  51. node.add_next_sibling('<br>') if node.next_sibling
  52. node.replace(node.children) unless node.text?
  53. end
  54. else
  55. current_node.name = 'p'
  56. end
  57. end
  58. MASTODON_STRICT ||= freeze_config(
  59. elements: %w(p br span a),
  60. attributes: {
  61. 'a' => %w(href rel class),
  62. 'span' => %w(class),
  63. },
  64. add_attributes: {
  65. 'a' => {
  66. 'rel' => 'nofollow noopener noreferrer',
  67. 'target' => '_blank',
  68. },
  69. },
  70. protocols: {},
  71. transformers: [
  72. CLASS_WHITELIST_TRANSFORMER,
  73. UNSUPPORTED_ELEMENTS_TRANSFORMER,
  74. UNSUPPORTED_HREF_TRANSFORMER,
  75. ]
  76. )
  77. MASTODON_OEMBED ||= freeze_config merge(
  78. RELAXED,
  79. elements: RELAXED[:elements] + %w(audio embed iframe source video),
  80. attributes: merge(
  81. RELAXED[:attributes],
  82. 'audio' => %w(controls),
  83. 'embed' => %w(height src type width),
  84. 'iframe' => %w(allowfullscreen frameborder height scrolling src width),
  85. 'source' => %w(src type),
  86. 'video' => %w(controls height loop width),
  87. 'div' => [:data]
  88. ),
  89. protocols: merge(
  90. RELAXED[:protocols],
  91. 'embed' => { 'src' => HTTP_PROTOCOLS },
  92. 'iframe' => { 'src' => HTTP_PROTOCOLS },
  93. 'source' => { 'src' => HTTP_PROTOCOLS }
  94. )
  95. )
  96. end
  97. end