cd2nroff 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. #!/usr/bin/env perl
  2. #***************************************************************************
  3. # _ _ ____ _
  4. # Project ___| | | | _ \| |
  5. # / __| | | | |_) | |
  6. # | (__| |_| | _ <| |___
  7. # \___|\___/|_| \_\_____|
  8. #
  9. # Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  10. #
  11. # This software is licensed as described in the file COPYING, which
  12. # you should have received as part of this distribution. The terms
  13. # are also available at https://curl.se/docs/copyright.html.
  14. #
  15. # You may opt to use, copy, modify, merge, publish, distribute and/or sell
  16. # copies of the Software, and permit persons to whom the Software is
  17. # furnished to do so, under the terms of the COPYING file.
  18. #
  19. # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  20. # KIND, either express or implied.
  21. #
  22. # SPDX-License-Identifier: curl
  23. #
  24. ###########################################################################
  25. =begin comment
  26. Converts a curldown file to nroff (man page).
  27. =end comment
  28. =cut
  29. use strict;
  30. use warnings;
  31. my $cd2nroff = "0.1"; # to keep check
  32. my $dir;
  33. my $extension;
  34. my $keepfilename;
  35. while(@ARGV) {
  36. if($ARGV[0] eq "-d") {
  37. shift @ARGV;
  38. $dir = shift @ARGV;
  39. }
  40. elsif($ARGV[0] eq "-e") {
  41. shift @ARGV;
  42. $extension = shift @ARGV;
  43. }
  44. elsif($ARGV[0] eq "-k") {
  45. shift @ARGV;
  46. $keepfilename = 1;
  47. }
  48. elsif($ARGV[0] eq "-h") {
  49. print <<HELP
  50. Usage: cd2nroff [options] [file.md]
  51. -d <dir> Write the output to the file name from the meta-data in the
  52. specified directory, instead of writing to stdout
  53. -e <ext> If -d is used, this option can provide an added "extension", arbitrary
  54. text really, to append to the file name.
  55. -h This help text,
  56. -v Show version then exit
  57. HELP
  58. ;
  59. exit 0;
  60. }
  61. elsif($ARGV[0] eq "-v") {
  62. print "cd2nroff version $cd2nroff\n";
  63. exit 0;
  64. }
  65. else {
  66. last;
  67. }
  68. }
  69. use POSIX qw(strftime);
  70. my @ts;
  71. if (defined($ENV{SOURCE_DATE_EPOCH})) {
  72. @ts = gmtime($ENV{SOURCE_DATE_EPOCH});
  73. } else {
  74. @ts = localtime;
  75. }
  76. my $date = strftime "%Y-%m-%d", @ts;
  77. sub outseealso {
  78. my (@sa) = @_;
  79. my $comma = 0;
  80. my @o;
  81. push @o, ".SH SEE ALSO\n";
  82. for my $s (sort @sa) {
  83. push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
  84. $comma = 1;
  85. }
  86. push @o, "\n";
  87. return @o;
  88. }
  89. sub outprotocols {
  90. my (@p) = @_;
  91. my $comma = 0;
  92. my @o;
  93. push @o, ".SH PROTOCOLS\n";
  94. if($p[0] eq "TLS") {
  95. push @o, "All TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
  96. }
  97. else {
  98. my @s = sort @p;
  99. for my $e (sort @s) {
  100. push @o, sprintf "%s$e",
  101. $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
  102. $comma = 1;
  103. }
  104. }
  105. push @o, "\n";
  106. return @o;
  107. }
  108. sub outtls {
  109. my (@t) = @_;
  110. my $comma = 0;
  111. my @o;
  112. if($t[0] eq "All") {
  113. push @o, "\nAll TLS backends support this option.";
  114. }
  115. else {
  116. push @o, "\nThis option works only with the following TLS backends:\n";
  117. my @s = sort @t;
  118. for my $e (@s) {
  119. push @o, sprintf "%s$e",
  120. $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
  121. $comma = 1;
  122. }
  123. }
  124. push @o, "\n";
  125. return @o;
  126. }
  127. my %knownprotos = (
  128. 'DICT' => 1,
  129. 'FILE' => 1,
  130. 'FTP' => 1,
  131. 'FTPS' => 1,
  132. 'GOPHER' => 1,
  133. 'GOPHERS' => 1,
  134. 'HTTP' => 1,
  135. 'HTTPS' => 1,
  136. 'IMAP' => 1,
  137. 'IMAPS' => 1,
  138. 'LDAP' => 1,
  139. 'LDAPS' => 1,
  140. 'MQTT' => 1,
  141. 'POP3' => 1,
  142. 'POP3S' => 1,
  143. 'RTMP' => 1,
  144. 'RTMPS' => 1,
  145. 'RTSP' => 1,
  146. 'SCP' => 1,
  147. 'SFTP' => 1,
  148. 'SMB' => 1,
  149. 'SMBS' => 1,
  150. 'SMTP' => 1,
  151. 'SMTPS' => 1,
  152. 'TELNET' => 1,
  153. 'TFTP' => 1,
  154. 'WS' => 1,
  155. 'WSS' => 1,
  156. 'TLS' => 1,
  157. 'TCP' => 1,
  158. 'All' => 1
  159. );
  160. my %knowntls = (
  161. 'BearSSL' => 1,
  162. 'GnuTLS' => 1,
  163. 'mbedTLS' => 1,
  164. 'OpenSSL' => 1,
  165. 'rustls' => 1,
  166. 'Schannel' => 1,
  167. 'Secure Transport' => 1,
  168. 'wolfSSL' => 1,
  169. 'All' => 1,
  170. );
  171. sub single {
  172. my @seealso;
  173. my @proto;
  174. my @tls;
  175. my $d;
  176. my ($f)=@_;
  177. my $copyright;
  178. my $errors = 0;
  179. my $fh;
  180. my $line;
  181. my $list;
  182. my $tlslist;
  183. my $section;
  184. my $source;
  185. my $spdx;
  186. my $start = 0;
  187. my $title;
  188. if(defined($f)) {
  189. if(!open($fh, "<:crlf", "$f")) {
  190. print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
  191. return 1;
  192. }
  193. }
  194. else {
  195. $f = "STDIN";
  196. $fh = \*STDIN;
  197. binmode($fh, ":crlf");
  198. }
  199. while(<$fh>) {
  200. $line++;
  201. if(!$start) {
  202. if(/^---/) {
  203. # header starts here
  204. $start = 1;
  205. }
  206. next;
  207. }
  208. if(/^Title: *(.*)/i) {
  209. $title=$1;
  210. }
  211. elsif(/^Section: *(.*)/i) {
  212. $section=$1;
  213. }
  214. elsif(/^Source: *(.*)/i) {
  215. $source=$1;
  216. }
  217. elsif(/^See-also: +(.*)/i) {
  218. $list = 1; # 1 for see-also
  219. push @seealso, $1;
  220. }
  221. elsif(/^See-also: */i) {
  222. if($seealso[0]) {
  223. print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
  224. return 2;
  225. }
  226. $list = 1; # 1 for see-also
  227. }
  228. elsif(/^Protocol:/i) {
  229. $list = 2; # 2 for protocol
  230. }
  231. elsif(/^TLS-backend:/i) {
  232. $list = 3; # 3 for TLS backend
  233. }
  234. elsif(/^ +- (.*)/i) {
  235. # the only lists we support are see-also and protocol
  236. if($list == 1) {
  237. push @seealso, $1;
  238. }
  239. elsif($list == 2) {
  240. push @proto, $1;
  241. }
  242. elsif($list == 3) {
  243. push @tls, $1;
  244. }
  245. else {
  246. print STDERR "$f:$line:1:ERROR: list item without owner?\n";
  247. return 2;
  248. }
  249. }
  250. # REUSE-IgnoreStart
  251. elsif(/^C: (.*)/i) {
  252. $copyright=$1;
  253. }
  254. elsif(/^SPDX-License-Identifier: (.*)/i) {
  255. $spdx=$1;
  256. }
  257. # REUSE-IgnoreEnd
  258. elsif(/^---/) {
  259. # end of the header section
  260. if(!$title) {
  261. print STDERR "ERROR: no 'Title:' in $f\n";
  262. return 1;
  263. }
  264. if(!$section) {
  265. print STDERR "ERROR: no 'Section:' in $f\n";
  266. return 2;
  267. }
  268. if(!$seealso[0]) {
  269. print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
  270. return 2;
  271. }
  272. if(!$copyright) {
  273. print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
  274. return 2;
  275. }
  276. if(!$spdx) {
  277. print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
  278. return 2;
  279. }
  280. if($section == 3) {
  281. if(!$proto[0]) {
  282. printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
  283. exit 2;
  284. }
  285. my $tls = 0;
  286. for my $p (@proto) {
  287. if($p eq "TLS") {
  288. $tls = 1;
  289. }
  290. if(!$knownprotos{$p}) {
  291. printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
  292. exit 2;
  293. }
  294. }
  295. # This is for TLS, require TLS-backend:
  296. if($tls) {
  297. if(!$tls[0]) {
  298. printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
  299. exit 2;
  300. }
  301. for my $t (@tls) {
  302. if(!$knowntls{$t}) {
  303. printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
  304. exit 2;
  305. }
  306. }
  307. }
  308. }
  309. last;
  310. }
  311. else {
  312. chomp;
  313. print STDERR "WARN: unrecognized line in $f, ignoring:\n:'$_';"
  314. }
  315. }
  316. if(!$start) {
  317. print STDERR "$f:$line:1:ERROR: no header present\n";
  318. return 2;
  319. }
  320. my @desc;
  321. my $quote = 0;
  322. my $blankline = 0;
  323. my $header = 0;
  324. # cut off the leading path from the file name, if any
  325. $f =~ s/^(.*[\\\/])//;
  326. push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
  327. push @desc, ".TH $title $section \"$date\" $source\n";
  328. while(<$fh>) {
  329. $line++;
  330. $d = $_;
  331. if($quote) {
  332. if($quote == 4) {
  333. # remove the indentation
  334. if($d =~ /^ (.*)/) {
  335. push @desc, "$1\n";
  336. next;
  337. }
  338. else {
  339. # end of quote
  340. $quote = 0;
  341. push @desc, ".fi\n";
  342. next;
  343. }
  344. }
  345. if(/^~~~/) {
  346. # end of quote
  347. $quote = 0;
  348. push @desc, ".fi\n";
  349. next;
  350. }
  351. # convert single backslahes to doubles
  352. $d =~ s/\\/\\\\/g;
  353. # lines starting with a period needs it escaped
  354. $d =~ s/^\./\\&./;
  355. push @desc, $d;
  356. next;
  357. }
  358. # remove single line HTML comments
  359. $d =~ s/<!--.*?-->//g;
  360. # **bold**
  361. $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
  362. # *italics*
  363. $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
  364. if($d =~ /[^\\][\<\>]/) {
  365. print STDERR "$f:$line:1:WARN: un-escaped < or > used\n";
  366. }
  367. # convert backslash-'<' or '> to just the second character
  368. $d =~ s/\\([<>])/$1/g;
  369. # mentions of curl symbols with man pages use italics by default
  370. $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
  371. # backticked becomes italics
  372. $d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
  373. if(/^## (.*)/) {
  374. my $word = $1;
  375. # if there are enclosing quotes, remove them first
  376. $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
  377. # enclose in double quotes if there is a space present
  378. if($word =~ / /) {
  379. push @desc, ".IP \"$word\"\n";
  380. }
  381. else {
  382. push @desc, ".IP $word\n";
  383. }
  384. $header = 1;
  385. }
  386. elsif(/^##/) {
  387. # end of IP sequence
  388. push @desc, ".PP\n";
  389. $header = 1;
  390. }
  391. elsif(/^# (.*)/) {
  392. my $word = $1;
  393. # if there are enclosing quotes, remove them first
  394. $word =~ s/[\"\'](.*)[\"\']\z/$1/;
  395. if($word eq "PROTOCOLS") {
  396. print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
  397. }
  398. elsif($word eq "EXAMPLE") {
  399. # insert the generated PROTOCOLS section before EXAMPLE
  400. push @desc, outprotocols(@proto);
  401. if($proto[0] eq "TLS") {
  402. push @desc, outtls(@tls);
  403. }
  404. }
  405. push @desc, ".SH $word\n";
  406. $header = 1;
  407. }
  408. elsif(/^~~~c/) {
  409. # start of a code section, not indented
  410. $quote = 1;
  411. push @desc, "\n" if($blankline && !$header);
  412. $header = 0;
  413. push @desc, ".nf\n";
  414. }
  415. elsif(/^~~~/) {
  416. # start of a quote section; not code, not indented
  417. $quote = 1;
  418. push @desc, "\n" if($blankline && !$header);
  419. $header = 0;
  420. push @desc, ".nf\n";
  421. }
  422. elsif(/^ (.*)/) {
  423. # quoted, indented by 4 space
  424. $quote = 4;
  425. push @desc, "\n" if($blankline && !$header);
  426. $header = 0;
  427. push @desc, ".nf\n$1\n";
  428. }
  429. elsif(/^[ \t]*\n/) {
  430. # count and ignore blank lines
  431. $blankline++;
  432. }
  433. else {
  434. # don't output newlines if this is the first content after a
  435. # header
  436. push @desc, "\n" if($blankline && !$header);
  437. $blankline = 0;
  438. $header = 0;
  439. # quote minuses in the output
  440. $d =~ s/([^\\])-/$1\\-/g;
  441. # replace single quotes
  442. $d =~ s/\'/\\(aq/g;
  443. # handle double quotes first on the line
  444. $d =~ s/^(\s*)\"/$1\\&\"/;
  445. # lines starting with a period needs it escaped
  446. $d =~ s/^\./\\&./;
  447. if($d =~ /^(.*) /) {
  448. printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
  449. length($1);
  450. $errors++;
  451. }
  452. if($d =~ /^[ \t]*\n/) {
  453. # replaced away all contents
  454. $blankline= 1;
  455. }
  456. else {
  457. push @desc, $d;
  458. }
  459. }
  460. }
  461. if($fh != \*STDIN) {
  462. close($fh);
  463. }
  464. push @desc, outseealso(@seealso);
  465. if($dir) {
  466. if($keepfilename) {
  467. $title = $f;
  468. $title =~ s/\.[^.]*$//;
  469. }
  470. my $outfile = "$dir/$title.$section";
  471. if(defined($extension)) {
  472. $outfile .= $extension;
  473. }
  474. if(!open(O, ">", $outfile)) {
  475. print STDERR "Failed to open $outfile : $!\n";
  476. return 1;
  477. }
  478. print O @desc;
  479. close(O);
  480. }
  481. else {
  482. print @desc;
  483. }
  484. return $errors;
  485. }
  486. if(@ARGV) {
  487. for my $f (@ARGV) {
  488. my $r = single($f);
  489. if($r) {
  490. exit $r;
  491. }
  492. }
  493. }
  494. else {
  495. exit single();
  496. }