twitter_regex.rb 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. module Twitter
  2. class Regex
  3. REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou
  4. REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
  5. REGEXEN[:valid_url_balanced_parens] = /
  6. \(
  7. (?:
  8. #{REGEXEN[:valid_general_url_path_chars]}+
  9. |
  10. # allow one nested level of balanced parentheses
  11. (?:
  12. #{REGEXEN[:valid_general_url_path_chars]}*
  13. \(
  14. #{REGEXEN[:valid_general_url_path_chars]}+
  15. \)
  16. #{REGEXEN[:valid_general_url_path_chars]}*
  17. )
  18. )
  19. \)
  20. /iox
  21. REGEXEN[:valid_url_path] = /(?:
  22. (?:
  23. #{REGEXEN[:valid_general_url_path_chars]}*
  24. (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
  25. #{REGEXEN[:valid_url_path_ending_chars]}
  26. )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
  27. )/iox
  28. REGEXEN[:valid_url] = %r{
  29. ( # $1 total match
  30. (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter
  31. ( # $3 URL
  32. (https?:\/\/)? # $4 Protocol (optional)
  33. (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
  34. (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
  35. (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
  36. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
  37. )
  38. )
  39. }iox
  40. end
  41. end