cd2nroff 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. #!/usr/bin/env perl
  2. #***************************************************************************
  3. # _ _ ____ _
  4. # Project ___| | | | _ \| |
  5. # / __| | | | |_) | |
  6. # | (__| |_| | _ <| |___
  7. # \___|\___/|_| \_\_____|
  8. #
  9. # Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  10. #
  11. # This software is licensed as described in the file COPYING, which
  12. # you should have received as part of this distribution. The terms
  13. # are also available at https://curl.se/docs/copyright.html.
  14. #
  15. # You may opt to use, copy, modify, merge, publish, distribute and/or sell
  16. # copies of the Software, and permit persons to whom the Software is
  17. # furnished to do so, under the terms of the COPYING file.
  18. #
  19. # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  20. # KIND, either express or implied.
  21. #
  22. # SPDX-License-Identifier: curl
  23. #
  24. ###########################################################################
  25. =begin comment
  26. Converts a curldown file to nroff (manpage).
  27. =end comment
  28. =cut
  29. use strict;
  30. use warnings;
  31. my $cd2nroff = "0.1"; # to keep check
  32. my $dir;
  33. my $extension;
  34. my $keepfilename;
  35. while(@ARGV) {
  36. if($ARGV[0] eq "-d") {
  37. shift @ARGV;
  38. $dir = shift @ARGV;
  39. }
  40. elsif($ARGV[0] eq "-e") {
  41. shift @ARGV;
  42. $extension = shift @ARGV;
  43. }
  44. elsif($ARGV[0] eq "-k") {
  45. shift @ARGV;
  46. $keepfilename = 1;
  47. }
  48. elsif($ARGV[0] eq "-h") {
  49. print <<HELP
  50. Usage: cd2nroff [options] [file.md]
  51. -d <dir> Write the output to the file name from the meta-data in the
  52. specified directory, instead of writing to stdout
  53. -e <ext> If -d is used, this option can provide an added "extension", arbitrary
  54. text really, to append to the file name.
  55. -h This help text,
  56. -v Show version then exit
  57. HELP
  58. ;
  59. exit 0;
  60. }
  61. elsif($ARGV[0] eq "-v") {
  62. print "cd2nroff version $cd2nroff\n";
  63. exit 0;
  64. }
  65. else {
  66. last;
  67. }
  68. }
  69. use POSIX qw(strftime);
  70. my @ts;
  71. if (defined($ENV{SOURCE_DATE_EPOCH})) {
  72. @ts = gmtime($ENV{SOURCE_DATE_EPOCH});
  73. } else {
  74. @ts = localtime;
  75. }
  76. my $date = strftime "%Y-%m-%d", @ts;
  77. sub outseealso {
  78. my (@sa) = @_;
  79. my $comma = 0;
  80. my @o;
  81. push @o, ".SH SEE ALSO\n";
  82. for my $s (sort @sa) {
  83. push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
  84. $comma = 1;
  85. }
  86. push @o, "\n";
  87. return @o;
  88. }
  89. sub outprotocols {
  90. my (@p) = @_;
  91. my $comma = 0;
  92. my @o;
  93. push @o, ".SH PROTOCOLS\n";
  94. if($p[0] eq "TLS") {
  95. push @o, "This functionality affects all TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
  96. }
  97. else {
  98. my @s = sort @p;
  99. push @o, "This functionality affects ";
  100. for my $e (sort @s) {
  101. push @o, sprintf "%s%s",
  102. $comma ? (($e eq $s[-1]) ? " and " : ", "): "",
  103. lc($e);
  104. $comma = 1;
  105. }
  106. if($#s == 0) {
  107. if($s[0] eq "All") {
  108. push @o, " supported protocols";
  109. }
  110. else {
  111. push @o, " only";
  112. }
  113. }
  114. }
  115. push @o, "\n";
  116. return @o;
  117. }
  118. sub outtls {
  119. my (@t) = @_;
  120. my $comma = 0;
  121. my @o;
  122. if($t[0] eq "All") {
  123. push @o, "\nAll TLS backends support this option.";
  124. }
  125. elsif($t[0] eq "none") {
  126. push @o, "\nNo TLS backend supports this option.";
  127. }
  128. else {
  129. push @o, "\nThis option works only with the following TLS backends:\n";
  130. my @s = sort @t;
  131. for my $e (@s) {
  132. push @o, sprintf "%s$e",
  133. $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
  134. $comma = 1;
  135. }
  136. }
  137. push @o, "\n";
  138. return @o;
  139. }
  140. my %knownprotos = (
  141. 'DICT' => 1,
  142. 'FILE' => 1,
  143. 'FTP' => 1,
  144. 'FTPS' => 1,
  145. 'GOPHER' => 1,
  146. 'GOPHERS' => 1,
  147. 'HTTP' => 1,
  148. 'HTTPS' => 1,
  149. 'IMAP' => 1,
  150. 'IMAPS' => 1,
  151. 'LDAP' => 1,
  152. 'LDAPS' => 1,
  153. 'MQTT' => 1,
  154. 'POP3' => 1,
  155. 'POP3S' => 1,
  156. 'RTMP' => 1,
  157. 'RTMPS' => 1,
  158. 'RTSP' => 1,
  159. 'SCP' => 1,
  160. 'SFTP' => 1,
  161. 'SMB' => 1,
  162. 'SMBS' => 1,
  163. 'SMTP' => 1,
  164. 'SMTPS' => 1,
  165. 'TELNET' => 1,
  166. 'TFTP' => 1,
  167. 'WS' => 1,
  168. 'WSS' => 1,
  169. 'TLS' => 1,
  170. 'TCP' => 1,
  171. 'QUIC' => 1,
  172. 'All' => 1
  173. );
  174. my %knowntls = (
  175. 'BearSSL' => 1,
  176. 'GnuTLS' => 1,
  177. 'mbedTLS' => 1,
  178. 'OpenSSL' => 1,
  179. 'rustls' => 1,
  180. 'Schannel' => 1,
  181. 'Secure Transport' => 1,
  182. 'wolfSSL' => 1,
  183. 'All' => 1,
  184. 'none' => 1,
  185. );
  186. sub single {
  187. my @seealso;
  188. my @proto;
  189. my @tls;
  190. my $d;
  191. my ($f)=@_;
  192. my $copyright;
  193. my $errors = 0;
  194. my $fh;
  195. my $line;
  196. my $list;
  197. my $tlslist;
  198. my $section;
  199. my $source;
  200. my $addedin;
  201. my $spdx;
  202. my $start = 0;
  203. my $title;
  204. if(defined($f)) {
  205. if(!open($fh, "<:crlf", "$f")) {
  206. print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
  207. return 1;
  208. }
  209. }
  210. else {
  211. $f = "STDIN";
  212. $fh = \*STDIN;
  213. binmode($fh, ":crlf");
  214. }
  215. while(<$fh>) {
  216. $line++;
  217. if(!$start) {
  218. if(/^---/) {
  219. # header starts here
  220. $start = 1;
  221. }
  222. next;
  223. }
  224. if(/^Title: *(.*)/i) {
  225. $title=$1;
  226. }
  227. elsif(/^Section: *(.*)/i) {
  228. $section=$1;
  229. }
  230. elsif(/^Source: *(.*)/i) {
  231. $source=$1;
  232. }
  233. elsif(/^See-also: +(.*)/i) {
  234. $list = 1; # 1 for see-also
  235. push @seealso, $1;
  236. }
  237. elsif(/^See-also: */i) {
  238. if($seealso[0]) {
  239. print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
  240. return 2;
  241. }
  242. $list = 1; # 1 for see-also
  243. }
  244. elsif(/^Protocol:/i) {
  245. $list = 2; # 2 for protocol
  246. }
  247. elsif(/^TLS-backend:/i) {
  248. $list = 3; # 3 for TLS backend
  249. }
  250. elsif(/^Added-in: *(.*)/i) {
  251. $addedin=$1;
  252. if(($addedin !~ /^[0-9.]+[0-9]\z/) &&
  253. ($addedin ne "n/a")) {
  254. print STDERR "$f:$line:1:ERROR: invalid version number in Added-in line: $addedin\n";
  255. return 2;
  256. }
  257. }
  258. elsif(/^ +- (.*)/i) {
  259. # the only lists we support are see-also and protocol
  260. if($list == 1) {
  261. push @seealso, $1;
  262. }
  263. elsif($list == 2) {
  264. push @proto, $1;
  265. }
  266. elsif($list == 3) {
  267. push @tls, $1;
  268. }
  269. else {
  270. print STDERR "$f:$line:1:ERROR: list item without owner?\n";
  271. return 2;
  272. }
  273. }
  274. # REUSE-IgnoreStart
  275. elsif(/^C: (.*)/i) {
  276. $copyright=$1;
  277. }
  278. elsif(/^SPDX-License-Identifier: (.*)/i) {
  279. $spdx=$1;
  280. }
  281. # REUSE-IgnoreEnd
  282. elsif(/^---/) {
  283. # end of the header section
  284. if(!$title) {
  285. print STDERR "$f:$line:1:ERROR: no 'Title:' in $f\n";
  286. return 1;
  287. }
  288. if(!$section) {
  289. print STDERR "$f:$line:1:ERROR: no 'Section:' in $f\n";
  290. return 2;
  291. }
  292. if(!$source) {
  293. print STDERR "$f:$line:1:ERROR: no 'Source:' in $f\n";
  294. return 2;
  295. }
  296. if(($source eq "libcurl") && !$addedin) {
  297. print STDERR "$f:$line:1:ERROR: no 'Added-in:' in $f\n";
  298. return 2;
  299. }
  300. if(!$seealso[0]) {
  301. print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
  302. return 2;
  303. }
  304. if(!$copyright) {
  305. print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
  306. return 2;
  307. }
  308. if(!$spdx) {
  309. print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
  310. return 2;
  311. }
  312. if($section == 3) {
  313. if(!$proto[0]) {
  314. printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
  315. exit 2;
  316. }
  317. my $tls = 0;
  318. for my $p (@proto) {
  319. if($p eq "TLS") {
  320. $tls = 1;
  321. }
  322. if(!$knownprotos{$p}) {
  323. printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
  324. exit 2;
  325. }
  326. }
  327. # This is for TLS, require TLS-backend:
  328. if($tls) {
  329. if(!$tls[0]) {
  330. printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
  331. exit 2;
  332. }
  333. for my $t (@tls) {
  334. if(!$knowntls{$t}) {
  335. printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
  336. exit 2;
  337. }
  338. }
  339. }
  340. }
  341. last;
  342. }
  343. else {
  344. chomp;
  345. print STDERR "$f:$line:1:ERROR: unrecognized header keyword: '$_'\n";
  346. $errors++;
  347. }
  348. }
  349. if(!$start) {
  350. print STDERR "$f:$line:1:ERROR: no header present\n";
  351. return 2;
  352. }
  353. my @desc;
  354. my $quote = 0;
  355. my $blankline = 0;
  356. my $header = 0;
  357. # cut off the leading path from the file name, if any
  358. $f =~ s/^(.*[\\\/])//;
  359. push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
  360. push @desc, ".TH $title $section \"$date\" $source\n";
  361. while(<$fh>) {
  362. $line++;
  363. $d = $_;
  364. if($quote) {
  365. if($quote == 4) {
  366. # remove the indentation
  367. if($d =~ /^ (.*)/) {
  368. push @desc, "$1\n";
  369. next;
  370. }
  371. else {
  372. # end of quote
  373. $quote = 0;
  374. push @desc, ".fi\n";
  375. next;
  376. }
  377. }
  378. if(/^~~~/) {
  379. # end of quote
  380. $quote = 0;
  381. push @desc, ".fi\n";
  382. next;
  383. }
  384. # convert single backslahes to doubles
  385. $d =~ s/\\/\\\\/g;
  386. # lines starting with a period needs it escaped
  387. $d =~ s/^\./\\&./;
  388. push @desc, $d;
  389. next;
  390. }
  391. # remove single line HTML comments
  392. $d =~ s/<!--.*?-->//g;
  393. # **bold**
  394. $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
  395. # *italics*
  396. $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
  397. if($d =~ /[^\\][\<\>]/) {
  398. print STDERR "$f:$line:1:ERROR: un-escaped < or > used\n";
  399. $errors++;
  400. }
  401. # convert backslash-'<' or '> to just the second character
  402. $d =~ s/\\([<>])/$1/g;
  403. # mentions of curl symbols with manpages use italics by default
  404. $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
  405. # backticked becomes italics
  406. $d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
  407. if(/^## (.*)/) {
  408. my $word = $1;
  409. # if there are enclosing quotes, remove them first
  410. $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
  411. # enclose in double quotes if there is a space present
  412. if($word =~ / /) {
  413. push @desc, ".IP \"$word\"\n";
  414. }
  415. else {
  416. push @desc, ".IP $word\n";
  417. }
  418. $header = 1;
  419. }
  420. elsif(/^##/) {
  421. # end of IP sequence
  422. push @desc, ".PP\n";
  423. $header = 1;
  424. }
  425. elsif(/^# (.*)/) {
  426. my $word = $1;
  427. # if there are enclosing quotes, remove them first
  428. $word =~ s/[\"\'](.*)[\"\']\z/$1/;
  429. if($word eq "PROTOCOLS") {
  430. print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
  431. }
  432. elsif($word eq "AVAILABILITY") {
  433. print STDERR "$f:$line:1:WARN: AVAILABILITY section in source file\n";
  434. }
  435. elsif($word eq "%PROTOCOLS%") {
  436. # insert the generated PROTOCOLS section
  437. push @desc, outprotocols(@proto);
  438. if($proto[0] eq "TLS") {
  439. push @desc, outtls(@tls);
  440. }
  441. $header = 1;
  442. next;
  443. }
  444. elsif($word eq "%AVAILABILITY%") {
  445. if($addedin ne "n/a") {
  446. # insert the generated AVAILABILITY section
  447. push @desc, ".SH AVAILABILITY\n";
  448. push @desc, "Added in curl $addedin\n";
  449. }
  450. $header = 1;
  451. next;
  452. }
  453. push @desc, ".SH $word\n";
  454. $header = 1;
  455. }
  456. elsif(/^~~~c/) {
  457. # start of a code section, not indented
  458. $quote = 1;
  459. push @desc, "\n" if($blankline && !$header);
  460. $header = 0;
  461. push @desc, ".nf\n";
  462. }
  463. elsif(/^~~~/) {
  464. # start of a quote section; not code, not indented
  465. $quote = 1;
  466. push @desc, "\n" if($blankline && !$header);
  467. $header = 0;
  468. push @desc, ".nf\n";
  469. }
  470. elsif(/^ (.*)/) {
  471. # quoted, indented by 4 space
  472. $quote = 4;
  473. push @desc, "\n" if($blankline && !$header);
  474. $header = 0;
  475. push @desc, ".nf\n$1\n";
  476. }
  477. elsif(/^[ \t]*\n/) {
  478. # count and ignore blank lines
  479. $blankline++;
  480. }
  481. else {
  482. # don't output newlines if this is the first content after a
  483. # header
  484. push @desc, "\n" if($blankline && !$header);
  485. $blankline = 0;
  486. $header = 0;
  487. # quote minuses in the output
  488. $d =~ s/([^\\])-/$1\\-/g;
  489. # replace single quotes
  490. $d =~ s/\'/\\(aq/g;
  491. # handle double quotes first on the line
  492. $d =~ s/^(\s*)\"/$1\\&\"/;
  493. # lines starting with a period needs it escaped
  494. $d =~ s/^\./\\&./;
  495. if($d =~ /^(.*) /) {
  496. printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
  497. length($1);
  498. $errors++;
  499. }
  500. if($d =~ /^[ \t]*\n/) {
  501. # replaced away all contents
  502. $blankline= 1;
  503. }
  504. else {
  505. push @desc, $d;
  506. }
  507. }
  508. }
  509. if($fh != \*STDIN) {
  510. close($fh);
  511. }
  512. push @desc, outseealso(@seealso);
  513. if($dir) {
  514. if($keepfilename) {
  515. $title = $f;
  516. $title =~ s/\.[^.]*$//;
  517. }
  518. my $outfile = "$dir/$title.$section";
  519. if(defined($extension)) {
  520. $outfile .= $extension;
  521. }
  522. if(!open(O, ">", $outfile)) {
  523. print STDERR "Failed to open $outfile : $!\n";
  524. return 1;
  525. }
  526. print O @desc;
  527. close(O);
  528. }
  529. else {
  530. print @desc;
  531. }
  532. return $errors;
  533. }
  534. if(@ARGV) {
  535. for my $f (@ARGV) {
  536. my $r = single($f);
  537. if($r) {
  538. exit $r;
  539. }
  540. }
  541. }
  542. else {
  543. exit single();
  544. }