lang-compress.pl 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. #! /usr/bin/env perl
  2. #
  3. # C source compressor. This:
  4. #
  5. # - merges continuation lines
  6. # - removes comments (not in strings)
  7. # - removes empty lines (not in strings)
  8. use strict;
  9. use warnings;
  10. my $debug = defined $ENV{DEBUG};
  11. my $lang = shift @ARGV;
  12. # Slurp the file
  13. $/ = undef;
  14. $_ = <>;
  15. if ($lang eq 'C') {
  16. # Merge continuation lines
  17. s{\\\n}{}g;
  18. # Regexp for things that should be preserved
  19. my $preserved =
  20. qr{
  21. (?:
  22. " # String start
  23. (?: \\. | [^\"])* # Any character, including escaped ones
  24. " # String end
  25. )
  26. | # OR
  27. (?:
  28. ' # Character start (multi-chars supported)
  29. (?: \\. | [^\'])+ # Any character, including escaped ones
  30. ' # String end
  31. )
  32. }x;
  33. # Remove comments while preserving strings
  34. s{
  35. (?| # All things preserved end up in $1
  36. /\* # C comment start
  37. .*? # Contents up until
  38. \*/ # C comment end
  39. | # OR
  40. ( # Grouping for the replacement
  41. $preserved
  42. )
  43. )
  44. }{
  45. if ($debug) {
  46. print STDERR "DEBUG: '$&' => '$1'\n" if defined $1;
  47. print STDERR "DEBUG: '$&' removed\n" unless defined $1;
  48. }
  49. defined $1 ? $1 : ""
  50. }gsxe;
  51. # Remove empty lines
  52. s{
  53. (?| # All things preserved end up in $1
  54. (^|\n)(?:\s*(?:\n|$))+ # Empty lines, preserve one newline
  55. | # OR
  56. ( # Grouping for the replacement
  57. $preserved
  58. )
  59. )
  60. }{$1}gsx;
  61. # Remove extra spaces
  62. s{
  63. (?| # All things preserved end up in $1
  64. \h+ # Horizontal spaces replaced with one
  65. | # OR
  66. ( # Grouping for the replacement
  67. $preserved
  68. )
  69. )
  70. }{
  71. if ($debug) {
  72. print STDERR "DEBUG: '$&' => '$1'\n" if defined $1;
  73. print STDERR "DEBUG: '$&' => ' '\n" unless defined $1;
  74. }
  75. defined $1 ? $1 : " "
  76. }gsxe;
  77. # Clean up spaces at start and end of lines
  78. s/^ //mg;
  79. s/ $//mg;
  80. } elsif ($lang eq 'S') {
  81. # Because we use C++ style comments in our .S files, all we can do
  82. # is to drop them
  83. s{
  84. ^([^\n]*?)//[^\n]*?$ # Any line with a // comment
  85. }{
  86. if ($debug) {
  87. print STDERR "DEBUG: '$&' => '$1'\n" if defined $1;
  88. print STDERR "DEBUG: '$&' removed\n" unless defined $1;
  89. }
  90. defined $1 ? $1 : ""
  91. }mgsxe;
  92. # Drop all empty lines
  93. s{
  94. (^|\n)(?:\s*(?:\n|$))+ # Empty lines, preserve one newline
  95. }{$1}gsx;
  96. } elsif ($lang eq 'perl') {
  97. # Merge continuation lines
  98. s{\\\n}{}g;
  99. # Regexp for things that should be preserved
  100. my $preserved =
  101. qr{
  102. (?:
  103. <<["']?(\w+)["']? # HERE document start
  104. .*? # Its contents
  105. ^\g{-1}$
  106. )
  107. |
  108. (?:
  109. " # Double quoted string start
  110. (?: \\. | [^\"])* # Any character, including escaped ones
  111. " # Double quoted string end
  112. )
  113. | # OR
  114. (?:
  115. ' # Single quoted string start
  116. [^\']* # Any character
  117. ' # Single quoted string end
  118. )
  119. }msx;
  120. # Remove comments while preserving strings
  121. s{
  122. (?| # All things preserved end up in $1
  123. \#.*?(\n|$) # Perl comments
  124. | # OR
  125. ( # Grouping for the replacement
  126. $preserved
  127. )
  128. )
  129. }{
  130. if ($debug) {
  131. print STDERR "DEBUG: '$&' => '$1'\n" if defined $1;
  132. print STDERR "DEBUG: '$&' removed\n" unless defined $1;
  133. }
  134. defined $1 ? $1 : ""
  135. }gsxe;
  136. # Remove empty lines
  137. s{
  138. (?| # All things preserved end up in $1
  139. (^|\n)(?:\s*(?:\n|$))+ # Empty lines, preserve one newline
  140. | # OR
  141. ( # Grouping for the replacement
  142. $preserved
  143. )
  144. )
  145. }{$1}gsx;
  146. }
  147. print;