i18n-scan.pl 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. #!/usr/bin/perl
  2. use utf8;
  3. use strict;
  4. use warnings;
  5. use Text::Balanced qw(extract_tagged gen_delimited_pat);
  6. use POSIX;
  7. POSIX::setlocale(POSIX::LC_ALL, "C");
  8. @ARGV >= 1 || die "Usage: $0 <source directory>\n";
  9. my %stringtable;
  10. sub dec_lua_str
  11. {
  12. my $s = shift;
  13. my %rep = (
  14. 'a' => "\x07",
  15. 'b' => "\x08",
  16. 'f' => "\x0c",
  17. 'n' => "\n",
  18. 'r' => "\r",
  19. 't' => "\t",
  20. 'v' => "\x76"
  21. );
  22. $s =~ s!\\(?:([0-9]{1,2})|(.))!
  23. $1 ? chr(int($1)) : ($rep{$2} || $2)
  24. !segx;
  25. $s =~ s/[\s\n]+/ /g;
  26. $s =~ s/^ //;
  27. $s =~ s/ $//;
  28. return $s;
  29. }
  30. sub dec_json_str
  31. {
  32. my $s = shift;
  33. my %rep = (
  34. '"' => '"',
  35. '/' => '/',
  36. 'b' => "\x08",
  37. 'f' => "\x0c",
  38. 'n' => "\n",
  39. 'r' => "\r",
  40. 't' => "\t",
  41. '\\' => '\\'
  42. );
  43. $s =~ s!\\([\\/"bfnrt]|u([0-9a-fA-F]{4}))!
  44. $2 ? chr(hex($2)) : $rep{$1}
  45. !egx;
  46. $s =~ s/[\s\n]+/ /g;
  47. $s =~ s/^ //;
  48. $s =~ s/ $//;
  49. return $s;
  50. }
  51. sub dec_tpl_str
  52. {
  53. my $s = shift;
  54. $s =~ s/-$//;
  55. $s =~ s/[\s\n]+/ /g;
  56. $s =~ s/^ //;
  57. $s =~ s/ $//;
  58. $s =~ s/\\/\\\\/g;
  59. return $s;
  60. }
  61. if( open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' ')' | sort |" )
  62. {
  63. while( defined( my $file = readline F ) )
  64. {
  65. chomp $file;
  66. if( open S, "< $file" )
  67. {
  68. binmode S, ':utf8';
  69. local $/ = undef;
  70. my $raw = <S>;
  71. close S;
  72. my $text = $raw;
  73. my $line = 1;
  74. while ($text =~ s/ ^ (.*?) (?:translate|translatef|i18n|_) ([\n\s]*) \( //sgx)
  75. {
  76. my ($prefix, $suffix) = ($1, $2);
  77. my $code;
  78. my $res = "";
  79. my $sub = "";
  80. $line += () = $prefix =~ /\n/g;
  81. my $position = "$file:$line";
  82. $line += () = $suffix =~ /\n/g;
  83. while (defined $sub)
  84. {
  85. undef $sub;
  86. if ($text =~ /^ ([\n\s]*(?:\.\.[\n\s]*)?) (\[=*\[) /sx)
  87. {
  88. my $ws = $1;
  89. my $stag = quotemeta $2;
  90. (my $etag = $stag) =~ y/[/]/;
  91. ($sub, $text) = extract_tagged($text, $stag, $etag, q{\s*(?:\.\.\s*)?});
  92. $line += () = $ws =~ /\n/g;
  93. if (defined($sub) && length($sub)) {
  94. $line += () = $sub =~ /\n/g;
  95. $sub =~ s/^$stag//;
  96. $sub =~ s/$etag$//;
  97. $res .= $sub;
  98. }
  99. }
  100. elsif ($text =~ /^ ([\n\s]*(?:\.\.[\n\s]*)?) (['"]) /sx)
  101. {
  102. my $ws = $1;
  103. my $quote = $2;
  104. my $re = gen_delimited_pat($quote, '\\');
  105. if ($text =~ m/\G\s*(?:\.\.\s*)?($re)/gcs)
  106. {
  107. $sub = $1;
  108. $text = substr $text, pos $text;
  109. }
  110. $line += () = $ws =~ /\n/g;
  111. if (defined($sub) && length($sub)) {
  112. $line += () = $sub =~ /\n/g;
  113. $sub =~ s/^$quote//;
  114. $sub =~ s/$quote$//;
  115. $res .= $sub;
  116. }
  117. }
  118. }
  119. if (defined($res))
  120. {
  121. $res = dec_lua_str($res);
  122. if ($res) {
  123. $stringtable{$res} ||= [ ];
  124. push @{$stringtable{$res}}, $position;
  125. }
  126. }
  127. }
  128. $text = $raw;
  129. $line = 1;
  130. while( $text =~ s/ ^ (.*?) <% -? [:_] /<%/sgx )
  131. {
  132. $line += () = $1 =~ /\n/g;
  133. ( my $code, $text ) = extract_tagged($text, '<%', '%>');
  134. if( defined $code )
  135. {
  136. my $position = "$file:$line";
  137. $line += () = $code =~ /\n/g;
  138. $code = dec_tpl_str(substr $code, 2, length($code) - 4);
  139. $stringtable{$code} ||= [];
  140. push @{$stringtable{$code}}, $position;
  141. }
  142. }
  143. }
  144. }
  145. close F;
  146. }
  147. if( open F, "find @ARGV -type f -path '*/menu.d/*.json' | sort |" )
  148. {
  149. while( defined( my $file = readline F ) )
  150. {
  151. chomp $file;
  152. if( open S, "< $file" )
  153. {
  154. binmode S, ':utf8';
  155. local $/ = undef;
  156. my $raw = <S>;
  157. close S;
  158. my $text = $raw;
  159. my $line = 1;
  160. while ($text =~ s/ ^ (.*?) "title" ([\n\s]*) : //sgx)
  161. {
  162. my ($prefix, $suffix) = ($1, $2);
  163. my $code;
  164. my $res = "";
  165. my $sub = "";
  166. $line += () = $prefix =~ /\n/g;
  167. my $position = "$file:$line";
  168. $line += () = $suffix =~ /\n/g;
  169. while (defined $sub)
  170. {
  171. undef $sub;
  172. if ($text =~ /^ ([\n\s]*) " /sx)
  173. {
  174. my $ws = $1;
  175. my $re = gen_delimited_pat('"', '\\');
  176. if ($text =~ m/\G\s*($re)/gcs)
  177. {
  178. $sub = $1;
  179. $text = substr $text, pos $text;
  180. }
  181. $line += () = $ws =~ /\n/g;
  182. if (defined($sub) && length($sub)) {
  183. $line += () = $sub =~ /\n/g;
  184. $sub =~ s/^"//;
  185. $sub =~ s/"$//;
  186. $res .= $sub;
  187. }
  188. }
  189. }
  190. if (defined($res))
  191. {
  192. $res = dec_json_str($res);
  193. if ($res) {
  194. $stringtable{$res} ||= [ ];
  195. push @{$stringtable{$res}}, $position;
  196. }
  197. }
  198. }
  199. }
  200. }
  201. close F;
  202. }
  203. if( open C, "| msgcat -" )
  204. {
  205. binmode C, ':utf8';
  206. printf C "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n\n";
  207. foreach my $key ( sort keys %stringtable )
  208. {
  209. if( length $key )
  210. {
  211. my @positions =
  212. map { join ':', @$_ }
  213. sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
  214. map { [ /^(.+):(\d+)$/ ] }
  215. @{$stringtable{$key}};
  216. $key =~ s/\\/\\\\/g;
  217. $key =~ s/\n/\\n/g;
  218. $key =~ s/\t/\\t/g;
  219. $key =~ s/"/\\"/g;
  220. printf C "#: %s\nmsgid \"%s\"\nmsgstr \"\"\n\n",
  221. join(' ', @positions), $key;
  222. }
  223. }
  224. close C;
  225. }