cd2nroff 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. #!/usr/bin/env perl
  2. #***************************************************************************
  3. # _ _ ____ _
  4. # Project ___| | | | _ \| |
  5. # / __| | | | |_) | |
  6. # | (__| |_| | _ <| |___
  7. # \___|\___/|_| \_\_____|
  8. #
  9. # Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  10. #
  11. # This software is licensed as described in the file COPYING, which
  12. # you should have received as part of this distribution. The terms
  13. # are also available at https://curl.se/docs/copyright.html.
  14. #
  15. # You may opt to use, copy, modify, merge, publish, distribute and/or sell
  16. # copies of the Software, and permit persons to whom the Software is
  17. # furnished to do so, under the terms of the COPYING file.
  18. #
  19. # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  20. # KIND, either express or implied.
  21. #
  22. # SPDX-License-Identifier: curl
  23. #
  24. ###########################################################################
  25. =begin comment
  26. Converts a curldown file to nroff (man page).
  27. =end comment
  28. =cut
  29. use strict;
  30. use warnings;
  31. my $cd2nroff = "0.1"; # to keep check
  32. my $dir;
  33. my $extension;
  34. my $keepfilename;
  35. while(@ARGV) {
  36. if($ARGV[0] eq "-d") {
  37. shift @ARGV;
  38. $dir = shift @ARGV;
  39. }
  40. elsif($ARGV[0] eq "-e") {
  41. shift @ARGV;
  42. $extension = shift @ARGV;
  43. }
  44. elsif($ARGV[0] eq "-k") {
  45. shift @ARGV;
  46. $keepfilename = 1;
  47. }
  48. elsif($ARGV[0] eq "-h") {
  49. print <<HELP
  50. Usage: cd2nroff [options] [file.md]
  51. -d <dir> Write the output to the file name from the meta-data in the
  52. specified directory, instead of writing to stdout
  53. -e <ext> If -d is used, this option can provide an added "extension", arbitrary
  54. text really, to append to the file name.
  55. -h This help text,
  56. -v Show version then exit
  57. HELP
  58. ;
  59. exit 0;
  60. }
  61. elsif($ARGV[0] eq "-v") {
  62. print "cd2nroff version $cd2nroff\n";
  63. exit 0;
  64. }
  65. else {
  66. last;
  67. }
  68. }
  69. use POSIX qw(strftime);
  70. my @ts;
  71. if (defined($ENV{SOURCE_DATE_EPOCH})) {
  72. @ts = gmtime($ENV{SOURCE_DATE_EPOCH});
  73. } else {
  74. @ts = localtime;
  75. }
  76. my $date = strftime "%Y-%m-%d", @ts;
  77. sub outseealso {
  78. my (@sa) = @_;
  79. my $comma = 0;
  80. my @o;
  81. push @o, ".SH SEE ALSO\n";
  82. for my $s (sort @sa) {
  83. push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
  84. $comma = 1;
  85. }
  86. push @o, "\n";
  87. return @o;
  88. }
  89. sub outprotocols {
  90. my (@p) = @_;
  91. my $comma = 0;
  92. my @o;
  93. push @o, ".SH PROTOCOLS\n";
  94. if($p[0] eq "TLS") {
  95. push @o, "All TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
  96. }
  97. else {
  98. my @s = sort @p;
  99. for my $e (sort @s) {
  100. push @o, sprintf "%s$e",
  101. $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
  102. $comma = 1;
  103. }
  104. }
  105. push @o, "\n";
  106. return @o;
  107. }
  108. sub outtls {
  109. my (@t) = @_;
  110. my $comma = 0;
  111. my @o;
  112. if($t[0] eq "All") {
  113. push @o, "\nAll TLS backends support this option.";
  114. }
  115. else {
  116. push @o, "\nThis option works only with the following TLS backends:\n";
  117. my @s = sort @t;
  118. for my $e (@s) {
  119. push @o, sprintf "%s$e",
  120. $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
  121. $comma = 1;
  122. }
  123. }
  124. push @o, "\n";
  125. return @o;
  126. }
  127. my %knownprotos = (
  128. 'DICT' => 1,
  129. 'FILE' => 1,
  130. 'FTP' => 1,
  131. 'FTPS' => 1,
  132. 'GOPHER' => 1,
  133. 'GOPHERS' => 1,
  134. 'HTTP' => 1,
  135. 'HTTPS' => 1,
  136. 'IMAP' => 1,
  137. 'IMAPS' => 1,
  138. 'LDAP' => 1,
  139. 'LDAPS' => 1,
  140. 'MQTT' => 1,
  141. 'POP3' => 1,
  142. 'POP3S' => 1,
  143. 'RTMP' => 1,
  144. 'RTMPS' => 1,
  145. 'RTSP' => 1,
  146. 'SCP' => 1,
  147. 'SFTP' => 1,
  148. 'SMB' => 1,
  149. 'SMBS' => 1,
  150. 'SMTP' => 1,
  151. 'SMTPS' => 1,
  152. 'TELNET' => 1,
  153. 'TFTP' => 1,
  154. 'WS' => 1,
  155. 'WSS' => 1,
  156. 'TLS' => 1,
  157. 'TCP' => 1,
  158. 'All' => 1
  159. );
  160. my %knowntls = (
  161. 'BearSSL' => 1,
  162. 'GnuTLS' => 1,
  163. 'mbedTLS' => 1,
  164. 'OpenSSL' => 1,
  165. 'rustls' => 1,
  166. 'Schannel' => 1,
  167. 'Secure Transport' => 1,
  168. 'wolfSSL' => 1,
  169. 'All' => 1,
  170. );
  171. sub single {
  172. my @seealso;
  173. my @proto;
  174. my @tls;
  175. my $d;
  176. my ($f)=@_;
  177. my $copyright;
  178. my $errors = 0;
  179. my $fh;
  180. my $line;
  181. my $list;
  182. my $tlslist;
  183. my $section;
  184. my $source;
  185. my $spdx;
  186. my $start = 0;
  187. my $title;
  188. if(defined($f)) {
  189. if(!open($fh, "<:crlf", "$f")) {
  190. print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
  191. return 1;
  192. }
  193. }
  194. else {
  195. $f = "STDIN";
  196. $fh = \*STDIN;
  197. binmode($fh, ":crlf");
  198. }
  199. while(<$fh>) {
  200. $line++;
  201. if(!$start) {
  202. if(/^---/) {
  203. # header starts here
  204. $start = 1;
  205. }
  206. next;
  207. }
  208. if(/^Title: *(.*)/i) {
  209. $title=$1;
  210. }
  211. elsif(/^Section: *(.*)/i) {
  212. $section=$1;
  213. }
  214. elsif(/^Source: *(.*)/i) {
  215. $source=$1;
  216. }
  217. elsif(/^See-also: +(.*)/i) {
  218. $list = 1; # 1 for see-also
  219. push @seealso, $1;
  220. }
  221. elsif(/^See-also: */i) {
  222. if($seealso[0]) {
  223. print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
  224. return 2;
  225. }
  226. $list = 1; # 1 for see-also
  227. }
  228. elsif(/^Protocol:/i) {
  229. $list = 2; # 2 for protocol
  230. }
  231. elsif(/^TLS-backend:/i) {
  232. $list = 3; # 3 for TLS backend
  233. }
  234. elsif(/^ +- (.*)/i) {
  235. # the only lists we support are see-also and protocol
  236. if($list == 1) {
  237. push @seealso, $1;
  238. }
  239. elsif($list == 2) {
  240. push @proto, $1;
  241. }
  242. elsif($list == 3) {
  243. push @tls, $1;
  244. }
  245. else {
  246. print STDERR "$f:$line:1:ERROR: list item without owner?\n";
  247. return 2;
  248. }
  249. }
  250. # REUSE-IgnoreStart
  251. elsif(/^C: (.*)/i) {
  252. $copyright=$1;
  253. }
  254. elsif(/^SPDX-License-Identifier: (.*)/i) {
  255. $spdx=$1;
  256. }
  257. # REUSE-IgnoreEnd
  258. elsif(/^---/) {
  259. # end of the header section
  260. if(!$title) {
  261. print STDERR "ERROR: no 'Title:' in $f\n";
  262. return 1;
  263. }
  264. if(!$section) {
  265. print STDERR "ERROR: no 'Section:' in $f\n";
  266. return 2;
  267. }
  268. if(!$seealso[0]) {
  269. print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
  270. return 2;
  271. }
  272. if(!$copyright) {
  273. print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
  274. return 2;
  275. }
  276. if(!$spdx) {
  277. print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
  278. return 2;
  279. }
  280. if($section == 3) {
  281. if(!$proto[0]) {
  282. printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
  283. exit 2;
  284. }
  285. my $tls = 0;
  286. for my $p (@proto) {
  287. if($p eq "TLS") {
  288. $tls = 1;
  289. }
  290. if(!$knownprotos{$p}) {
  291. printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
  292. exit 2;
  293. }
  294. }
  295. # This is for TLS, require TLS-backend:
  296. if($tls) {
  297. if(!$tls[0]) {
  298. printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
  299. exit 2;
  300. }
  301. for my $t (@tls) {
  302. if(!$knowntls{$t}) {
  303. printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
  304. exit 2;
  305. }
  306. }
  307. }
  308. }
  309. last;
  310. }
  311. else {
  312. chomp;
  313. print STDERR "WARN: unrecognized line in $f, ignoring:\n:'$_';"
  314. }
  315. }
  316. if(!$start) {
  317. print STDERR "$f:$line:1:ERROR: no header present\n";
  318. return 2;
  319. }
  320. my @desc;
  321. my $quote = 0;
  322. my $blankline = 0;
  323. my $header = 0;
  324. # cut off the leading path from the file name, if any
  325. $f =~ s/^(.*[\\\/])//;
  326. push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
  327. push @desc, ".TH $title $section \"$date\" $source\n";
  328. while(<$fh>) {
  329. $line++;
  330. $d = $_;
  331. if($quote) {
  332. if($quote == 4) {
  333. # remove the indentation
  334. if($d =~ /^ (.*)/) {
  335. push @desc, "$1\n";
  336. next;
  337. }
  338. else {
  339. # end of quote
  340. $quote = 0;
  341. push @desc, ".fi\n";
  342. next;
  343. }
  344. }
  345. if(/^~~~/) {
  346. # end of quote
  347. $quote = 0;
  348. push @desc, ".fi\n";
  349. next;
  350. }
  351. # convert single backslahes to doubles
  352. $d =~ s/\\/\\\\/g;
  353. # lines starting with a period needs it escaped
  354. $d =~ s/^\./\\&./;
  355. push @desc, $d;
  356. next;
  357. }
  358. # remove single line HTML comments
  359. $d =~ s/<!--.*?-->//g;
  360. # **bold**
  361. $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
  362. # *italics*
  363. $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
  364. if($d =~ /[^\\][\<\>]/) {
  365. print STDERR "$f:$line:1:WARN: un-escaped < or > used\n";
  366. }
  367. # convert backslash-'<' or '> to just the second character
  368. $d =~ s/\\([<>])/$1/g;
  369. # mentions of curl symbols with man pages use italics by default
  370. $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
  371. # backticked becomes italics
  372. $d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
  373. if(/^## (.*)/) {
  374. my $word = $1;
  375. # if there are enclosing quotes, remove them first
  376. $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
  377. # enclose in double quotes if there is a space present
  378. if($word =~ / /) {
  379. push @desc, ".IP \"$word\"\n";
  380. }
  381. else {
  382. push @desc, ".IP $word\n";
  383. }
  384. $header = 1;
  385. }
  386. elsif(/^# (.*)/) {
  387. my $word = $1;
  388. # if there are enclosing quotes, remove them first
  389. $word =~ s/[\"\'](.*)[\"\']\z/$1/;
  390. if($word eq "PROTOCOLS") {
  391. print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
  392. }
  393. elsif($word eq "EXAMPLE") {
  394. # insert the generated PROTOCOLS section before EXAMPLE
  395. push @desc, outprotocols(@proto);
  396. if($proto[0] eq "TLS") {
  397. push @desc, outtls(@tls);
  398. }
  399. }
  400. push @desc, ".SH $word\n";
  401. $header = 1;
  402. }
  403. elsif(/^~~~c/) {
  404. # start of a code section, not indented
  405. $quote = 1;
  406. push @desc, "\n" if($blankline && !$header);
  407. $header = 0;
  408. push @desc, ".nf\n";
  409. }
  410. elsif(/^~~~/) {
  411. # start of a quote section; not code, not indented
  412. $quote = 1;
  413. push @desc, "\n" if($blankline && !$header);
  414. $header = 0;
  415. push @desc, ".nf\n";
  416. }
  417. elsif(/^ (.*)/) {
  418. # quoted, indented by 4 space
  419. $quote = 4;
  420. push @desc, "\n" if($blankline && !$header);
  421. $header = 0;
  422. push @desc, ".nf\n$1\n";
  423. }
  424. elsif(/^[ \t]*\n/) {
  425. # count and ignore blank lines
  426. $blankline++;
  427. }
  428. else {
  429. # don't output newlines if this is the first content after a
  430. # header
  431. push @desc, "\n" if($blankline && !$header);
  432. $blankline = 0;
  433. $header = 0;
  434. # quote minuses in the output
  435. $d =~ s/([^\\])-/$1\\-/g;
  436. # replace single quotes
  437. $d =~ s/\'/\\(aq/g;
  438. # handle double quotes first on the line
  439. $d =~ s/^(\s*)\"/$1\\&\"/;
  440. # lines starting with a period needs it escaped
  441. $d =~ s/^\./\\&./;
  442. if($d =~ /^(.*) /) {
  443. printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
  444. length($1);
  445. $errors++;
  446. }
  447. if($d =~ /^[ \t]*\n/) {
  448. # replaced away all contents
  449. $blankline= 1;
  450. }
  451. else {
  452. push @desc, $d;
  453. }
  454. }
  455. }
  456. if($fh != \*STDIN) {
  457. close($fh);
  458. }
  459. push @desc, outseealso(@seealso);
  460. if($dir) {
  461. if($keepfilename) {
  462. $title = $f;
  463. $title =~ s/\.[^.]*$//;
  464. }
  465. my $outfile = "$dir/$title.$section";
  466. if(defined($extension)) {
  467. $outfile .= $extension;
  468. }
  469. if(!open(O, ">", $outfile)) {
  470. print STDERR "Failed to open $outfile : $!\n";
  471. return 1;
  472. }
  473. print O @desc;
  474. close(O);
  475. }
  476. else {
  477. print @desc;
  478. }
  479. return $errors;
  480. }
  481. if(@ARGV) {
  482. for my $f (@ARGV) {
  483. my $r = single($f);
  484. if($r) {
  485. exit $r;
  486. }
  487. }
  488. }
  489. else {
  490. exit single();
  491. }