2
0

cd2nroff 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. #!/usr/bin/env perl
  2. #***************************************************************************
  3. # _ _ ____ _
  4. # Project ___| | | | _ \| |
  5. # / __| | | | |_) | |
  6. # | (__| |_| | _ <| |___
  7. # \___|\___/|_| \_\_____|
  8. #
  9. # Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  10. #
  11. # This software is licensed as described in the file COPYING, which
  12. # you should have received as part of this distribution. The terms
  13. # are also available at https://curl.se/docs/copyright.html.
  14. #
  15. # You may opt to use, copy, modify, merge, publish, distribute and/or sell
  16. # copies of the Software, and permit persons to whom the Software is
  17. # furnished to do so, under the terms of the COPYING file.
  18. #
  19. # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  20. # KIND, either express or implied.
  21. #
  22. # SPDX-License-Identifier: curl
  23. #
  24. ###########################################################################
  25. =begin comment
  26. Converts a curldown file to nroff (manpage).
  27. =end comment
  28. =cut
  29. use strict;
  30. use warnings;
  31. my $cd2nroff = "0.1"; # to keep check
  32. my $dir;
  33. my $extension;
  34. my $keepfilename;
  35. while(@ARGV) {
  36. if($ARGV[0] eq "-d") {
  37. shift @ARGV;
  38. $dir = shift @ARGV;
  39. }
  40. elsif($ARGV[0] eq "-e") {
  41. shift @ARGV;
  42. $extension = shift @ARGV;
  43. }
  44. elsif($ARGV[0] eq "-k") {
  45. shift @ARGV;
  46. $keepfilename = 1;
  47. }
  48. elsif($ARGV[0] eq "-h") {
  49. print <<HELP
  50. Usage: cd2nroff [options] [file.md]
  51. -d <dir> Write the output to the file name from the meta-data in the
  52. specified directory, instead of writing to stdout
  53. -e <ext> If -d is used, this option can provide an added "extension", arbitrary
  54. text really, to append to the file name.
  55. -h This help text,
  56. -v Show version then exit
  57. HELP
  58. ;
  59. exit 0;
  60. }
  61. elsif($ARGV[0] eq "-v") {
  62. print "cd2nroff version $cd2nroff\n";
  63. exit 0;
  64. }
  65. else {
  66. last;
  67. }
  68. }
  69. use POSIX qw(strftime);
  70. my @ts;
  71. if (defined($ENV{SOURCE_DATE_EPOCH})) {
  72. @ts = gmtime($ENV{SOURCE_DATE_EPOCH});
  73. } else {
  74. @ts = localtime;
  75. }
  76. my $date = strftime "%Y-%m-%d", @ts;
  77. sub outseealso {
  78. my (@sa) = @_;
  79. my $comma = 0;
  80. my @o;
  81. push @o, ".SH SEE ALSO\n";
  82. for my $s (sort @sa) {
  83. push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
  84. $comma = 1;
  85. }
  86. push @o, "\n";
  87. return @o;
  88. }
  89. sub outprotocols {
  90. my (@p) = @_;
  91. my $comma = 0;
  92. my @o;
  93. push @o, ".SH PROTOCOLS\n";
  94. if($p[0] eq "TLS") {
  95. push @o, "This functionality affects all TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
  96. }
  97. else {
  98. my @s = sort @p;
  99. push @o, "This functionality affects ";
  100. for my $e (sort @s) {
  101. push @o, sprintf "%s%s",
  102. $comma ? (($e eq $s[-1]) ? " and " : ", "): "",
  103. lc($e);
  104. $comma = 1;
  105. }
  106. if($#s == 0) {
  107. if($s[0] eq "All") {
  108. push @o, " supported protocols";
  109. }
  110. else {
  111. push @o, " only";
  112. }
  113. }
  114. }
  115. push @o, "\n";
  116. return @o;
  117. }
  118. sub outtls {
  119. my (@t) = @_;
  120. my $comma = 0;
  121. my @o;
  122. if($t[0] eq "All") {
  123. push @o, "\nAll TLS backends support this option.";
  124. }
  125. else {
  126. push @o, "\nThis option works only with the following TLS backends:\n";
  127. my @s = sort @t;
  128. for my $e (@s) {
  129. push @o, sprintf "%s$e",
  130. $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
  131. $comma = 1;
  132. }
  133. }
  134. push @o, "\n";
  135. return @o;
  136. }
  137. my %knownprotos = (
  138. 'DICT' => 1,
  139. 'FILE' => 1,
  140. 'FTP' => 1,
  141. 'FTPS' => 1,
  142. 'GOPHER' => 1,
  143. 'GOPHERS' => 1,
  144. 'HTTP' => 1,
  145. 'HTTPS' => 1,
  146. 'IMAP' => 1,
  147. 'IMAPS' => 1,
  148. 'LDAP' => 1,
  149. 'LDAPS' => 1,
  150. 'MQTT' => 1,
  151. 'POP3' => 1,
  152. 'POP3S' => 1,
  153. 'RTMP' => 1,
  154. 'RTMPS' => 1,
  155. 'RTSP' => 1,
  156. 'SCP' => 1,
  157. 'SFTP' => 1,
  158. 'SMB' => 1,
  159. 'SMBS' => 1,
  160. 'SMTP' => 1,
  161. 'SMTPS' => 1,
  162. 'TELNET' => 1,
  163. 'TFTP' => 1,
  164. 'WS' => 1,
  165. 'WSS' => 1,
  166. 'TLS' => 1,
  167. 'TCP' => 1,
  168. 'QUIC' => 1,
  169. 'All' => 1
  170. );
  171. my %knowntls = (
  172. 'BearSSL' => 1,
  173. 'GnuTLS' => 1,
  174. 'mbedTLS' => 1,
  175. 'OpenSSL' => 1,
  176. 'rustls' => 1,
  177. 'Schannel' => 1,
  178. 'Secure Transport' => 1,
  179. 'wolfSSL' => 1,
  180. 'All' => 1,
  181. );
  182. sub single {
  183. my @seealso;
  184. my @proto;
  185. my @tls;
  186. my $d;
  187. my ($f)=@_;
  188. my $copyright;
  189. my $errors = 0;
  190. my $fh;
  191. my $line;
  192. my $list;
  193. my $tlslist;
  194. my $section;
  195. my $source;
  196. my $addedin;
  197. my $spdx;
  198. my $start = 0;
  199. my $title;
  200. if(defined($f)) {
  201. if(!open($fh, "<:crlf", "$f")) {
  202. print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
  203. return 1;
  204. }
  205. }
  206. else {
  207. $f = "STDIN";
  208. $fh = \*STDIN;
  209. binmode($fh, ":crlf");
  210. }
  211. while(<$fh>) {
  212. $line++;
  213. if(!$start) {
  214. if(/^---/) {
  215. # header starts here
  216. $start = 1;
  217. }
  218. next;
  219. }
  220. if(/^Title: *(.*)/i) {
  221. $title=$1;
  222. }
  223. elsif(/^Section: *(.*)/i) {
  224. $section=$1;
  225. }
  226. elsif(/^Source: *(.*)/i) {
  227. $source=$1;
  228. }
  229. elsif(/^See-also: +(.*)/i) {
  230. $list = 1; # 1 for see-also
  231. push @seealso, $1;
  232. }
  233. elsif(/^See-also: */i) {
  234. if($seealso[0]) {
  235. print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
  236. return 2;
  237. }
  238. $list = 1; # 1 for see-also
  239. }
  240. elsif(/^Protocol:/i) {
  241. $list = 2; # 2 for protocol
  242. }
  243. elsif(/^TLS-backend:/i) {
  244. $list = 3; # 3 for TLS backend
  245. }
  246. elsif(/^Added-in: *(.*)/i) {
  247. $addedin=$1;
  248. if(($addedin !~ /^[0-9.]+[0-9]\z/) &&
  249. ($addedin ne "n/a")) {
  250. print STDERR "$f:$line:1:ERROR: invalid version number in Added-in line: $addedin\n";
  251. return 2;
  252. }
  253. }
  254. elsif(/^ +- (.*)/i) {
  255. # the only lists we support are see-also and protocol
  256. if($list == 1) {
  257. push @seealso, $1;
  258. }
  259. elsif($list == 2) {
  260. push @proto, $1;
  261. }
  262. elsif($list == 3) {
  263. push @tls, $1;
  264. }
  265. else {
  266. print STDERR "$f:$line:1:ERROR: list item without owner?\n";
  267. return 2;
  268. }
  269. }
  270. # REUSE-IgnoreStart
  271. elsif(/^C: (.*)/i) {
  272. $copyright=$1;
  273. }
  274. elsif(/^SPDX-License-Identifier: (.*)/i) {
  275. $spdx=$1;
  276. }
  277. # REUSE-IgnoreEnd
  278. elsif(/^---/) {
  279. # end of the header section
  280. if(!$title) {
  281. print STDERR "$f:$line:1:ERROR: no 'Title:' in $f\n";
  282. return 1;
  283. }
  284. if(!$section) {
  285. print STDERR "$f:$line:1:ERROR: no 'Section:' in $f\n";
  286. return 2;
  287. }
  288. if(!$source) {
  289. print STDERR "$f:$line:1:ERROR: no 'Source:' in $f\n";
  290. return 2;
  291. }
  292. if(($source eq "libcurl") && !$addedin) {
  293. print STDERR "$f:$line:1:ERROR: no 'Added-in:' in $f\n";
  294. return 2;
  295. }
  296. if(!$seealso[0]) {
  297. print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
  298. return 2;
  299. }
  300. if(!$copyright) {
  301. print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
  302. return 2;
  303. }
  304. if(!$spdx) {
  305. print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
  306. return 2;
  307. }
  308. if($section == 3) {
  309. if(!$proto[0]) {
  310. printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
  311. exit 2;
  312. }
  313. my $tls = 0;
  314. for my $p (@proto) {
  315. if($p eq "TLS") {
  316. $tls = 1;
  317. }
  318. if(!$knownprotos{$p}) {
  319. printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
  320. exit 2;
  321. }
  322. }
  323. # This is for TLS, require TLS-backend:
  324. if($tls) {
  325. if(!$tls[0]) {
  326. printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
  327. exit 2;
  328. }
  329. for my $t (@tls) {
  330. if(!$knowntls{$t}) {
  331. printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
  332. exit 2;
  333. }
  334. }
  335. }
  336. }
  337. last;
  338. }
  339. else {
  340. chomp;
  341. print STDERR "$f:$line:1:ERROR: unrecognized header keyword: '$_'\n";
  342. $errors++;
  343. }
  344. }
  345. if(!$start) {
  346. print STDERR "$f:$line:1:ERROR: no header present\n";
  347. return 2;
  348. }
  349. my @desc;
  350. my $quote = 0;
  351. my $blankline = 0;
  352. my $header = 0;
  353. # cut off the leading path from the file name, if any
  354. $f =~ s/^(.*[\\\/])//;
  355. push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
  356. push @desc, ".TH $title $section \"$date\" $source\n";
  357. while(<$fh>) {
  358. $line++;
  359. $d = $_;
  360. if($quote) {
  361. if($quote == 4) {
  362. # remove the indentation
  363. if($d =~ /^ (.*)/) {
  364. push @desc, "$1\n";
  365. next;
  366. }
  367. else {
  368. # end of quote
  369. $quote = 0;
  370. push @desc, ".fi\n";
  371. next;
  372. }
  373. }
  374. if(/^~~~/) {
  375. # end of quote
  376. $quote = 0;
  377. push @desc, ".fi\n";
  378. next;
  379. }
  380. # convert single backslahes to doubles
  381. $d =~ s/\\/\\\\/g;
  382. # lines starting with a period needs it escaped
  383. $d =~ s/^\./\\&./;
  384. push @desc, $d;
  385. next;
  386. }
  387. # remove single line HTML comments
  388. $d =~ s/<!--.*?-->//g;
  389. # **bold**
  390. $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
  391. # *italics*
  392. $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
  393. if($d =~ /[^\\][\<\>]/) {
  394. print STDERR "$f:$line:1:ERROR: un-escaped < or > used\n";
  395. $errors++;
  396. }
  397. # convert backslash-'<' or '> to just the second character
  398. $d =~ s/\\([<>])/$1/g;
  399. # mentions of curl symbols with manpages use italics by default
  400. $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
  401. # backticked becomes italics
  402. $d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
  403. if(/^## (.*)/) {
  404. my $word = $1;
  405. # if there are enclosing quotes, remove them first
  406. $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
  407. # enclose in double quotes if there is a space present
  408. if($word =~ / /) {
  409. push @desc, ".IP \"$word\"\n";
  410. }
  411. else {
  412. push @desc, ".IP $word\n";
  413. }
  414. $header = 1;
  415. }
  416. elsif(/^##/) {
  417. # end of IP sequence
  418. push @desc, ".PP\n";
  419. $header = 1;
  420. }
  421. elsif(/^# (.*)/) {
  422. my $word = $1;
  423. # if there are enclosing quotes, remove them first
  424. $word =~ s/[\"\'](.*)[\"\']\z/$1/;
  425. if($word eq "PROTOCOLS") {
  426. print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
  427. }
  428. elsif($word eq "AVAILABILITY") {
  429. print STDERR "$f:$line:1:WARN: AVAILABILITY section in source file\n";
  430. }
  431. elsif($word eq "%PROTOCOLS%") {
  432. # insert the generated PROTOCOLS section
  433. push @desc, outprotocols(@proto);
  434. if($proto[0] eq "TLS") {
  435. push @desc, outtls(@tls);
  436. }
  437. $header = 1;
  438. next;
  439. }
  440. elsif($word eq "%AVAILABILITY%") {
  441. if($addedin ne "n/a") {
  442. # insert the generated AVAILABILITY section
  443. push @desc, ".SH AVAILABILITY\n";
  444. push @desc, "Added in curl $addedin\n";
  445. }
  446. $header = 1;
  447. next;
  448. }
  449. push @desc, ".SH $word\n";
  450. $header = 1;
  451. }
  452. elsif(/^~~~c/) {
  453. # start of a code section, not indented
  454. $quote = 1;
  455. push @desc, "\n" if($blankline && !$header);
  456. $header = 0;
  457. push @desc, ".nf\n";
  458. }
  459. elsif(/^~~~/) {
  460. # start of a quote section; not code, not indented
  461. $quote = 1;
  462. push @desc, "\n" if($blankline && !$header);
  463. $header = 0;
  464. push @desc, ".nf\n";
  465. }
  466. elsif(/^ (.*)/) {
  467. # quoted, indented by 4 space
  468. $quote = 4;
  469. push @desc, "\n" if($blankline && !$header);
  470. $header = 0;
  471. push @desc, ".nf\n$1\n";
  472. }
  473. elsif(/^[ \t]*\n/) {
  474. # count and ignore blank lines
  475. $blankline++;
  476. }
  477. else {
  478. # don't output newlines if this is the first content after a
  479. # header
  480. push @desc, "\n" if($blankline && !$header);
  481. $blankline = 0;
  482. $header = 0;
  483. # quote minuses in the output
  484. $d =~ s/([^\\])-/$1\\-/g;
  485. # replace single quotes
  486. $d =~ s/\'/\\(aq/g;
  487. # handle double quotes first on the line
  488. $d =~ s/^(\s*)\"/$1\\&\"/;
  489. # lines starting with a period needs it escaped
  490. $d =~ s/^\./\\&./;
  491. if($d =~ /^(.*) /) {
  492. printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
  493. length($1);
  494. $errors++;
  495. }
  496. if($d =~ /^[ \t]*\n/) {
  497. # replaced away all contents
  498. $blankline= 1;
  499. }
  500. else {
  501. push @desc, $d;
  502. }
  503. }
  504. }
  505. if($fh != \*STDIN) {
  506. close($fh);
  507. }
  508. push @desc, outseealso(@seealso);
  509. if($dir) {
  510. if($keepfilename) {
  511. $title = $f;
  512. $title =~ s/\.[^.]*$//;
  513. }
  514. my $outfile = "$dir/$title.$section";
  515. if(defined($extension)) {
  516. $outfile .= $extension;
  517. }
  518. if(!open(O, ">", $outfile)) {
  519. print STDERR "Failed to open $outfile : $!\n";
  520. return 1;
  521. }
  522. print O @desc;
  523. close(O);
  524. }
  525. else {
  526. print @desc;
  527. }
  528. return $errors;
  529. }
  530. if(@ARGV) {
  531. for my $f (@ARGV) {
  532. my $r = single($f);
  533. if($r) {
  534. exit $r;
  535. }
  536. }
  537. }
  538. else {
  539. exit single();
  540. }