toc_generator.rb 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. # frozen_string_literal: true
  2. class TOCGenerator
  3. TARGET_ELEMENTS = %w(h1 h2 h3 h4 h5 h6).freeze
  4. LISTED_ELEMENTS = %w(h2 h3).freeze
  5. class Section
  6. attr_accessor :depth, :title, :children, :anchor
  7. def initialize(depth, title, anchor)
  8. @depth = depth
  9. @title = title
  10. @children = []
  11. @anchor = anchor
  12. end
  13. delegate :<<, to: :children
  14. end
  15. def initialize(source_html)
  16. @source_html = source_html
  17. @processed = false
  18. @target_html = ''
  19. @headers = []
  20. @slugs = Hash.new { |h, k| h[k] = 0 }
  21. end
  22. def html
  23. parse_and_transform unless @processed
  24. @target_html
  25. end
  26. def toc
  27. parse_and_transform unless @processed
  28. @headers
  29. end
  30. private
  31. def parse_and_transform
  32. return if @source_html.blank?
  33. parsed_html = Nokogiri::HTML.fragment(@source_html)
  34. parsed_html.traverse do |node|
  35. next unless TARGET_ELEMENTS.include?(node.name)
  36. anchor = node['id'] || node.text.parameterize.presence || 'sec'
  37. @slugs[anchor] += 1
  38. anchor = "#{anchor}-#{@slugs[anchor]}" if @slugs[anchor] > 1
  39. node['id'] = anchor
  40. next unless LISTED_ELEMENTS.include?(node.name)
  41. depth = node.name[1..-1]
  42. latest_section = @headers.last
  43. if latest_section.nil? || latest_section.depth >= depth
  44. @headers << Section.new(depth, node.text, anchor)
  45. else
  46. latest_section << Section.new(depth, node.text, anchor)
  47. end
  48. end
  49. @target_html = parsed_html.to_s
  50. @processed = true
  51. end
  52. end