i18n-scan.pl 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. use IPC::Open2;
  5. use POSIX;
  6. use Text::Balanced qw(gen_extract_tagged);
  7. $ENV{'LC_ALL'} = 'C';
  8. POSIX::setlocale(POSIX::LC_ALL, 'C');
  9. @ARGV >= 1 || die "Usage: $0 <source directory>\n";
  10. my %keywords = (
  11. '.js' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
  12. '.ut' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
  13. '.uc' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
  14. '.lua' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
  15. '.htm' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
  16. '.json' => [ '_:1', '_:1,2c' ]
  17. );
  18. sub xgettext($@) {
  19. my $path = shift;
  20. my @keywords = @_;
  21. my ($ext) = $path =~ m!(\.\w+)$!;
  22. my @cmd = qw(xgettext --from-code=UTF-8 --no-wrap);
  23. if ($ext eq '.htm' || $ext eq '.lua') {
  24. push @cmd, '--language=Lua';
  25. }
  26. elsif ($ext eq '.ut' || $ext eq '.uc' || $ext eq '.js' || $ext eq '.json') {
  27. push @cmd, '--language=JavaScript';
  28. }
  29. push @cmd, map { "--keyword=$_" } (@{$keywords{$ext}}, @keywords);
  30. push @cmd, '-o', '-';
  31. return @cmd;
  32. }
  33. sub whitespace_collapse($) {
  34. my $s = shift;
  35. my %r = ('n' => ' ', 't' => ' ');
  36. # Translate \t and \n to plain spaces, leave all other escape
  37. # sequences alone. Finally replace all consecutive spaces by
  38. # single ones and trim leading and trailing space.
  39. $s =~ s/\\(.)/$r{$1} || "\\$1"/eg;
  40. $s =~ s/ {2,}/ /g;
  41. $s =~ s/^ //;
  42. $s =~ s/ $//;
  43. return $s;
  44. }
  45. sub postprocess_pot($$) {
  46. my ($path, $source) = @_;
  47. my (@res, $msgid);
  48. my $skip = 1;
  49. $source =~ s/^#: (.+?)\n/join("\n", map { "#: $path:$_" } $1 =~ m!:(\d+)!g) . "\n"/emg;
  50. my @lines = split /\n/, $source;
  51. # Remove all header lines up to the first location comment
  52. while (@lines > 0 && $lines[0] !~ m!^#: !) {
  53. shift @lines;
  54. }
  55. while (@lines > 0) {
  56. my $line = shift @lines;
  57. # Concat multiline msgids and collapse whitespaces
  58. if ($line =~ m!^(msg\w+) "(.*)"$!) {
  59. my $kw = $1;
  60. my $kv = $2;
  61. while (@lines > 0 && $lines[0] =~ m!^"(.*)"$!) {
  62. $kv .= ' '. $1;
  63. shift @lines;
  64. }
  65. $kv = whitespace_collapse($kv);
  66. # Filter invalid empty msgids by popping all lines in @res
  67. # leading to this point and skip all subsequent lines in
  68. # @lines belonging to this faulty id.
  69. if ($kw ne 'msgstr' && $kv eq '') {
  70. while (@res > 0 && $res[-1] !~ m!^$!) {
  71. pop @res;
  72. }
  73. while (@lines > 0 && $lines[0] =~ m!^(?:msg\w+ )?"(.*)"$!) {
  74. shift @lines;
  75. }
  76. next;
  77. }
  78. push @res, sprintf '%s "%s"', $kw, $kv;
  79. }
  80. # Ignore any flags added by xgettext
  81. elsif ($line =~ m!^#, !) {
  82. next;
  83. }
  84. # Pass through other lines unmodified
  85. else {
  86. push @res, $line;
  87. }
  88. }
  89. return @res ? join("\n", '', @res, '') : '';
  90. }
  91. sub uniq(@) {
  92. my %h = map { $_, 1 } @_;
  93. return sort keys %h;
  94. }
  95. sub preprocess_htm($$) {
  96. my ($path, $source) = @_;
  97. my $sub = {
  98. '=' => '(%s)',
  99. '_' => 'translate([==[%s]==])',
  100. ':' => 'translate([==[%s]==])',
  101. '+' => 'include([==[%s]==])',
  102. '#' => '--[==[%s]==]',
  103. '' => '%s'
  104. };
  105. # Translate the .htm source into a valid Lua source using bracket quotes
  106. # to avoid the need for complex escaping.
  107. $source =~ s!<%-?([=_:+#]?)(.*?)-?%>!
  108. my $t = $1;
  109. my $s = $2;
  110. # Split translation expressions on first non-escaped pipe.
  111. if ($t eq ':' || $t eq '_') {
  112. $s =~ s/^((?:[^\|\\]|\\.)*)\|(.*)$/$1]==],[==[$2/;
  113. }
  114. sprintf "]==]; $sub->{$t}; [==[", $s
  115. !sge;
  116. # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
  117. # and return them as extra keyword so that xgettext recognizes such expressions
  118. # as translate(...) calls.
  119. my @extra_function_keywords =
  120. map { ("$_:1", "$_:1,2c") }
  121. uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
  122. return ("[==[$source]==]", @extra_function_keywords);
  123. }
  124. sub preprocess_ut($$) {
  125. my ($path, $source) = @_;
  126. # Translate the .ut source into valid JavaScript code by enclosing template text
  127. # in multiline comments and extracting blocks as plain code.
  128. my $comt = gen_extract_tagged('{#', '#}', '(?s).*?(?=\{[#{%])');
  129. my $expr = gen_extract_tagged('{{', '}}', '(?s).*?(?=\{[#{%])');
  130. my $stmt = gen_extract_tagged('{%', '%}', '(?s).*?(?=\{[#{%])');
  131. my $res = '';
  132. while (length($source)) {
  133. my ($block, $remain, $prefix);
  134. ($block, $remain, $prefix) = $comt->($source);
  135. ($block, $remain, $prefix) = $expr->($source) unless defined $block;
  136. ($block, $remain, $prefix) = $stmt->($source) unless defined $block;
  137. last unless defined $block;
  138. $source = $remain;
  139. $prefix =~ s!\*/!*\\/!g;
  140. $res .= '/*' . $prefix . '*/';
  141. if ($block =~ s!^\{#(.*)#}$!$1!s) {
  142. $block =~ s!\*/!*\\/!g;
  143. $res .= '/*' . $block . '*/';
  144. }
  145. elsif ($block =~ s!^\{\{(.*)}}$!$1!s) {
  146. $block =~ s!^[+-]!!;
  147. $block =~ s![+-]$!!;
  148. $res .= '(' . $block . ')';
  149. }
  150. elsif ($block =~ s!^\{%(.*)%}$!$1!s) {
  151. $block =~ s!^[+-]!!;
  152. $block =~ s![+-]$!!;
  153. $res .= '{' . $block . '}';
  154. }
  155. }
  156. if ($source =~ m!^(.*)\{%[+-]?(.*)$!s) {
  157. my $prefix = $1;
  158. my $block = $2;
  159. $prefix =~ s!\*/!*\\/!g;
  160. $res .= '/*' . $prefix . '*/';
  161. $res .= '{' . $block . '}';
  162. }
  163. return ($res);
  164. }
  165. sub preprocess_lua($$) {
  166. my ($path, $source) = @_;
  167. # Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
  168. # and return them as extra keyword so that xgettext recognizes such expressions
  169. # as translate(...) calls.
  170. my @extra_function_keywords =
  171. map { ("$_:1", "$_:1,2c") }
  172. uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
  173. return ($source, @extra_function_keywords);
  174. }
  175. sub preprocess_json($$) {
  176. my ($path, $source) = @_;
  177. my ($file) = $path =~ m!([^/]+)$!;
  178. $source =~ s/("(?:title|description)")\s*:\s*("(?:[^"\\]|\\.)*")/$1: _($2)/sg;
  179. return ($source);
  180. }
  181. my ($msguniq_in, $msguniq_out);
  182. my $msguniq_pid = open2($msguniq_out, $msguniq_in, 'msguniq', '-s');
  183. print $msguniq_in "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n";
  184. if (open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' -o -name '*.uc' -o -name '*.ut' -o -path '*/menu.d/*.json' -o -path '*/acl.d/*.json' -o -path '*/statistics/plugins/*.json' -o -path '*/https-dns-proxy/providers/*.json' ')' |")
  185. {
  186. while (defined( my $file = readline F))
  187. {
  188. chomp $file;
  189. if (open S, '<', $file)
  190. {
  191. local $/ = undef;
  192. my $source = <S>;
  193. my @extra_function_keywords;
  194. if ($file =~ m!\.htm$!)
  195. {
  196. ($source, @extra_function_keywords) = preprocess_htm($file, $source);
  197. }
  198. elsif ($file =~ m!\.ut$!)
  199. {
  200. ($source, @extra_function_keywords) = preprocess_ut($file, $source);
  201. }
  202. elsif ($file =~ m!\.lua$!)
  203. {
  204. ($source, @extra_function_keywords) = preprocess_lua($file, $source);
  205. }
  206. elsif ($file =~ m!\.json$!)
  207. {
  208. ($source, @extra_function_keywords) = preprocess_json($file, $source);
  209. }
  210. my ($xgettext_in, $xgettext_out);
  211. my $pid = open2($xgettext_out, $xgettext_in, xgettext($file, @extra_function_keywords), '-');
  212. print $xgettext_in $source;
  213. close $xgettext_in;
  214. my $pot = readline $xgettext_out;
  215. close $xgettext_out;
  216. waitpid $pid, 0;
  217. print $msguniq_in postprocess_pot($file, $pot);
  218. }
  219. }
  220. close F;
  221. }
  222. close $msguniq_in;
  223. my @pot = <$msguniq_out>;
  224. close $msguniq_out;
  225. waitpid $msguniq_pid, 0;
  226. while (@pot > 0) {
  227. my $line = shift @pot;
  228. # Reorder the location comments in a deterministic way to
  229. # reduce SCM noise when frequently updating templates.
  230. if ($line =~ m!^#: !) {
  231. my @locs = ($line);
  232. while (@pot > 0 && $pot[0] =~ m!^#: !) {
  233. push @locs, shift @pot;
  234. }
  235. print
  236. map { join(':', @$_) . "\n" }
  237. sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
  238. map { [ /^(.+):(\d+)$/ ] }
  239. @locs
  240. ;
  241. next;
  242. }
  243. print $line;
  244. }