twitter_regex.rb 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. module Twitter::TwitterText
  2. class Configuration
  3. def emoji_parsing_enabled
  4. false
  5. end
  6. end
  7. class Regex
  8. REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}<>\(\)\?]/iou
  9. REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*"'「」<>;:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
  10. REGEXEN[:valid_url_balanced_parens] = /
  11. \(
  12. (?:
  13. #{REGEXEN[:valid_general_url_path_chars]}+
  14. |
  15. # allow one nested level of balanced parentheses
  16. (?:
  17. #{REGEXEN[:valid_general_url_path_chars]}*
  18. \(
  19. #{REGEXEN[:valid_general_url_path_chars]}+
  20. \)
  21. #{REGEXEN[:valid_general_url_path_chars]}*
  22. )
  23. )
  24. \)
  25. /iox
  26. UCHARS = '\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}'
  27. REGEXEN[:valid_url_query_chars] = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|@\^#{UCHARS}]/iou
  28. REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/\-#{UCHARS}]/iou
  29. REGEXEN[:valid_url_path] = /(?:
  30. (?:
  31. #{REGEXEN[:valid_general_url_path_chars]}*
  32. (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
  33. #{REGEXEN[:valid_url_path_ending_chars]}
  34. )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
  35. )/iox
  36. REGEXEN[:valid_url] = %r{
  37. ( # $1 total match
  38. (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
  39. ( # $3 URL
  40. ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher|gemini):\/\/)? # $4 Protocol (optional)
  41. (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
  42. (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
  43. (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
  44. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
  45. )
  46. )
  47. }iox
  48. REGEXEN[:validate_nodeid] = /(?:
  49. #{REGEXEN[:validate_url_unreserved]}|
  50. #{REGEXEN[:validate_url_pct_encoded]}|
  51. [!$()*+,;=]
  52. )/iox
  53. REGEXEN[:validate_resid] = /(?:
  54. #{REGEXEN[:validate_url_unreserved]}|
  55. #{REGEXEN[:validate_url_pct_encoded]}|
  56. #{REGEXEN[:validate_url_sub_delims]}
  57. )/iox
  58. REGEXEN[:valid_extended_uri] = %r{
  59. ( # $1 total match
  60. (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
  61. ( # $3 URL
  62. (
  63. (xmpp:) # Protocol
  64. (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional)
  65. (#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional)
  66. (#{REGEXEN[:valid_domain]}) # Domain in path
  67. (/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional)
  68. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
  69. ) | (
  70. (magnet:) # Protocol
  71. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String
  72. )
  73. )
  74. )
  75. }iox
  76. end
  77. end