i18n-scan.pl 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use IPC::Open2;
  5. use POSIX;
  6. $ENV{'LC_ALL'} = 'C';
  7. POSIX::setlocale(POSIX::LC_ALL, 'C');
  8. @ARGV >= 1 || die "Usage: $0 <source directory>\n";
  9. my %keywords = (
  10. '.js' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
  11. '.lua' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
  12. '.htm' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
  13. '.json' => [ '_:1', '_:1,2c' ]
  14. );
  15. sub xgettext($@) {
  16. my $path = shift;
  17. my @keywords = @_;
  18. my ($ext) = $path =~ m!(\.\w+)$!;
  19. my @cmd = qw(xgettext --from-code=UTF-8 --no-wrap);
  20. if ($ext eq '.htm' || $ext eq '.lua') {
  21. push @cmd, '--language=Lua';
  22. }
  23. elsif ($ext eq '.js' || $ext eq '.json') {
  24. push @cmd, '--language=JavaScript';
  25. }
  26. push @cmd, map { "--keyword=$_" } (@{$keywords{$ext}}, @keywords);
  27. push @cmd, '-o', '-';
  28. return @cmd;
  29. }
  30. sub whitespace_collapse($) {
  31. my $s = shift;
  32. my %r = ('n' => ' ', 't' => ' ');
  33. # Translate \t and \n to plain spaces, leave all other escape
  34. # sequences alone. Finally replace all consecutive spaces by
  35. # single ones and trim leading and trailing space.
  36. $s =~ s/\\(.)/$r{$1} || "\\$1"/eg;
  37. $s =~ s/ {2,}/ /g;
  38. $s =~ s/^ //;
  39. $s =~ s/ $//;
  40. return $s;
  41. }
  42. sub postprocess_pot($$) {
  43. my ($path, $source) = @_;
  44. my (@res, $msgid);
  45. my $skip = 1;
  46. $source =~ s/^#: (.+?)\n/join("\n", map { "#: $path:$_" } $1 =~ m!:(\d+)!g) . "\n"/emg;
  47. my @lines = split /\n/, $source;
  48. # Remove all header lines up to the first location comment
  49. while (@lines > 0 && $lines[0] !~ m!^#: !) {
  50. shift @lines;
  51. }
  52. while (@lines > 0) {
  53. my $line = shift @lines;
  54. # Concat multiline msgids and collapse whitespaces
  55. if ($line =~ m!^(msg\w+) "(.*)"$!) {
  56. my $kw = $1;
  57. my $kv = $2;
  58. while (@lines > 0 && $lines[0] =~ m!^"(.*)"$!) {
  59. $kv .= ' '. $1;
  60. shift @lines;
  61. }
  62. $kv = whitespace_collapse($kv);
  63. # Filter invalid empty msgids by popping all lines in @res
  64. # leading to this point and skip all subsequent lines in
  65. # @lines belonging to this faulty id.
  66. if ($kw ne 'msgstr' && $kv eq '') {
  67. while (@res > 0 && $res[-1] !~ m!^$!) {
  68. pop @res;
  69. }
  70. while (@lines > 0 && $lines[0] =~ m!^(?:msg\w+ )?"(.*)"$!) {
  71. shift @lines;
  72. }
  73. next;
  74. }
  75. push @res, sprintf '%s "%s"', $kw, $kv;
  76. }
  77. # Ignore any flags added by xgettext
  78. elsif ($line =~ m!^#, !) {
  79. next;
  80. }
  81. # Pass through other lines unmodified
  82. else {
  83. push @res, $line;
  84. }
  85. }
  86. return @res ? join("\n", '', @res, '') : '';
  87. }
  88. sub uniq(@) {
  89. my %h = map { $_, 1 } @_;
  90. return sort keys %h;
  91. }
  92. sub preprocess_htm($$) {
  93. my ($path, $source) = @_;
  94. my $sub = {
  95. '=' => '(%s)',
  96. '_' => 'translate([==[%s]==])',
  97. ':' => 'translate([==[%s]==])',
  98. '+' => 'include([==[%s]==])',
  99. '#' => '--[==[%s]==]',
  100. '' => '%s'
  101. };
  102. # Translate the .htm source into a valid Lua source using bracket quotes
  103. # to avoid the need for complex escaping.
  104. $source =~ s!<%-?([=_:+#]?)(.*?)-?%>!
  105. my $t = $1;
  106. my $s = $2;
  107. # Split translation expressions on first non-escaped pipe.
  108. if ($t eq ':' || $t eq '_') {
  109. $s =~ s/^((?:[^\|\\]|\\.)*)\|(.*)$/$1]==],[==[$2/;
  110. }
  111. sprintf "]==]; $sub->{$t}; [==[", $s
  112. !sge;
  113. # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
  114. # and return them as extra keyword so that xgettext recognizes such expressions
  115. # as translate(...) calls.
  116. my @extra_function_keywords =
  117. map { ("$_:1", "$_:1,2c") }
  118. uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
  119. return ("[==[$source]==]", @extra_function_keywords);
  120. }
  121. sub preprocess_lua($$) {
  122. my ($path, $source) = @_;
  123. # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
  124. # and return them as extra keyword so that xgettext recognizes such expressions
  125. # as translate(...) calls.
  126. my @extra_function_keywords =
  127. map { ("$_:1", "$_:1,2c") }
  128. uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
  129. return ($source, @extra_function_keywords);
  130. }
  131. sub preprocess_json($$) {
  132. my ($path, $source) = @_;
  133. my ($file) = $path =~ m!([^/]+)$!;
  134. $source =~ s/("(?:title|description)")\s*:\s*("(?:[^"\\]|\\.)*")/$1: _($2)/sg;
  135. return ($source);
  136. }
  137. my ($msguniq_in, $msguniq_out);
  138. my $msguniq_pid = open2($msguniq_out, $msguniq_in, 'msguniq', '-s');
  139. print $msguniq_in "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n";
  140. if (open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' -o -path '*/menu.d/*.json' -o -path '*/acl.d/*.json' -o -path '*/statistics/plugins/*.json' ')' |")
  141. {
  142. while (defined( my $file = readline F))
  143. {
  144. chomp $file;
  145. if (open S, '<', $file)
  146. {
  147. local $/ = undef;
  148. my $source = <S>;
  149. my @extra_function_keywords;
  150. if ($file =~ m!\.htm$!)
  151. {
  152. ($source, @extra_function_keywords) = preprocess_htm($file, $source);
  153. }
  154. elsif ($file =~ m!\.lua$!)
  155. {
  156. ($source, @extra_function_keywords) = preprocess_lua($file, $source);
  157. }
  158. elsif ($file =~ m!\.json$!)
  159. {
  160. ($source, @extra_function_keywords) = preprocess_json($file, $source);
  161. }
  162. my ($xgettext_in, $xgettext_out);
  163. my $pid = open2($xgettext_out, $xgettext_in, xgettext($file, @extra_function_keywords), '-');
  164. print $xgettext_in $source;
  165. close $xgettext_in;
  166. my $pot = readline $xgettext_out;
  167. close $xgettext_out;
  168. waitpid $pid, 0;
  169. print $msguniq_in postprocess_pot($file, $pot);
  170. }
  171. }
  172. close F;
  173. }
  174. close $msguniq_in;
  175. my @pot = <$msguniq_out>;
  176. close $msguniq_out;
  177. waitpid $msguniq_pid, 0;
  178. while (@pot > 0) {
  179. my $line = shift @pot;
  180. # Reorder the location comments in a detemrinistic way to
  181. # reduce SCM noise when frequently updating templates.
  182. if ($line =~ m!^#: !) {
  183. my @locs = ($line);
  184. while (@pot > 0 && $pot[0] =~ m!^#: !) {
  185. push @locs, shift @pot;
  186. }
  187. print
  188. map { join(':', @$_) . "\n" }
  189. sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
  190. map { [ /^(.+):(\d+)$/ ] }
  191. @locs
  192. ;
  193. next;
  194. }
  195. print $line;
  196. }