file.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. #include <ctype.h>
  13. #include <mach.h>
  14. /*
  15. * file - determine type of file
  16. */
  17. #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
  18. uint8_t buf[6001];
  19. int16_t cfreq[140];
  20. int16_t wfreq[50];
  21. int nbuf;
  22. Dir* mbuf;
  23. int fd;
  24. char *fname;
  25. char *slash;
  26. enum
  27. {
  28. Cword,
  29. Fword,
  30. Aword,
  31. Alword,
  32. Lword,
  33. I1,
  34. I2,
  35. I3,
  36. Clatin = 128,
  37. Cbinary,
  38. Cnull,
  39. Ceascii,
  40. Cutf,
  41. };
  42. struct
  43. {
  44. char* word;
  45. int class;
  46. } dict[] =
  47. {
  48. {"PATH", Lword},
  49. {"TEXT", Aword},
  50. {"adt", Alword},
  51. {"aggr", Alword},
  52. {"alef", Alword},
  53. {"array", Lword},
  54. {"block", Fword},
  55. {"char", Cword},
  56. {"common", Fword},
  57. {"con", Lword},
  58. {"data", Fword},
  59. {"dimension", Fword},
  60. {"double", Cword},
  61. {"extern", Cword},
  62. {"bio", I2},
  63. {"float", Cword},
  64. {"fn", Lword},
  65. {"function", Fword},
  66. {"h", I3},
  67. {"implement", Lword},
  68. {"import", Lword},
  69. {"include", I1},
  70. {"int", Cword},
  71. {"integer", Fword},
  72. {"iota", Lword},
  73. {"libc", I2},
  74. {"int32_t", Cword},
  75. {"module", Lword},
  76. {"real", Fword},
  77. {"ref", Lword},
  78. {"register", Cword},
  79. {"self", Lword},
  80. {"short", Cword},
  81. {"static", Cword},
  82. {"stdio", I2},
  83. {"struct", Cword},
  84. {"subroutine", Fword},
  85. {"u", I2},
  86. {"void", Cword},
  87. };
  88. /* codes for 'mode' field in language structure */
  89. enum {
  90. Normal = 0,
  91. First, /* first entry for language spanning several ranges */
  92. Multi, /* later entries " " " ... */
  93. Shared, /* codes used in several languages */
  94. };
  95. struct
  96. {
  97. int mode; /* see enum above */
  98. int count;
  99. int low;
  100. int high;
  101. char *name;
  102. } language[] =
  103. {
  104. {Normal, 0, 0x0100, 0x01FF, "Extended Latin"},
  105. {Normal, 0, 0x0370, 0x03FF, "Greek"},
  106. {Normal, 0, 0x0400, 0x04FF, "Cyrillic"},
  107. {Normal, 0, 0x0530, 0x058F, "Armenian"},
  108. {Normal, 0, 0x0590, 0x05FF, "Hebrew"},
  109. {Normal, 0, 0x0600, 0x06FF, "Arabic"},
  110. {Normal, 0, 0x0900, 0x097F, "Devanagari"},
  111. {Normal, 0, 0x0980, 0x09FF, "Bengali"},
  112. {Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi"},
  113. {Normal, 0, 0x0A80, 0x0AFF, "Gujarati"},
  114. {Normal, 0, 0x0B00, 0x0B7F, "Oriya"},
  115. {Normal, 0, 0x0B80, 0x0BFF, "Tamil"},
  116. {Normal, 0, 0x0C00, 0x0C7F, "Telugu"},
  117. {Normal, 0, 0x0C80, 0x0CFF, "Kannada"},
  118. {Normal, 0, 0x0D00, 0x0D7F, "Malayalam"},
  119. {Normal, 0, 0x0E00, 0x0E7F, "Thai"},
  120. {Normal, 0, 0x0E80, 0x0EFF, "Lao"},
  121. {Normal, 0, 0x1000, 0x105F, "Tibetan"},
  122. {Normal, 0, 0x10A0, 0x10FF, "Georgian"},
  123. {Normal, 0, 0x3040, 0x30FF, "Japanese"},
  124. {Normal, 0, 0x3100, 0x312F, "Chinese"},
  125. {First, 0, 0x3130, 0x318F, "Korean"},
  126. {Multi, 0, 0x3400, 0x3D2F, "Korean"},
  127. {Shared, 0, 0x4e00, 0x9fff, "CJK"},
  128. {Normal, 0, 0, 0, 0}, /* terminal entry */
  129. };
  130. enum
  131. {
  132. Fascii, /* printable ascii */
  133. Flatin, /* latin 1*/
  134. Futf, /* UTF character set */
  135. Fbinary, /* binary */
  136. Feascii, /* ASCII with control chars */
  137. Fnull, /* NULL in file */
  138. } guess;
  139. void bump_utf_count(Rune);
  140. int cistrncmp(char*, char*, int);
  141. void filetype(int);
  142. int getfontnum(uint8_t*, uint8_t**);
  143. int isas(void);
  144. int isc(void);
  145. int iscint(void);
  146. int isenglish(void);
  147. int ishp(void);
  148. int ishtml(void);
  149. int isrfc822(void);
  150. int ismbox(void);
  151. int islimbo(void);
  152. int ismung(void);
  153. int isp9bit(void);
  154. int isp9font(void);
  155. int isrtf(void);
  156. int ismsdos(void);
  157. int iself(void);
  158. int istring(void);
  159. int isoffstr(void);
  160. int iff(void);
  161. int long0(void);
  162. int longoff(void);
  163. int istar(void);
  164. int isface(void);
  165. int isexec(void);
  166. int p9bitnum(uint8_t*);
  167. int p9subfont(uint8_t*);
  168. void print_utf(void);
  169. void type(char*, int);
  170. int utf_count(void);
  171. void wordfreq(void);
  172. int (*call[])(void) =
  173. {
  174. long0, /* recognizable by first 4 bytes */
  175. istring, /* recognizable by first string */
  176. iself, /* ELF (foreign) executable */
  177. isexec, /* native executables */
  178. iff, /* interchange file format (strings) */
  179. longoff, /* recognizable by 4 bytes at some offset */
  180. isoffstr, /* recognizable by string at some offset */
  181. isrfc822, /* email file */
  182. ismbox, /* mail box */
  183. istar, /* recognizable by tar checksum */
  184. ishtml, /* html keywords */
  185. iscint, /* compiler/assembler intermediate */
  186. islimbo, /* limbo source */
  187. isc, /* c & alef compiler key words */
  188. isas, /* assembler key words */
  189. isp9font, /* plan 9 font */
  190. isp9bit, /* plan 9 image (as from /dev/window) */
  191. isrtf, /* rich text format */
  192. ismsdos, /* msdos exe (virus file attachement) */
  193. isface, /* ascii face file */
  194. /* last resorts */
  195. ismung, /* entropy compressed/encrypted */
  196. isenglish, /* char frequency English */
  197. 0
  198. };
  199. int mime;
  200. char OCTET[] = "application/octet-stream\n";
  201. char PLAIN[] = "text/plain\n";
  202. void
  203. main(int argc, char *argv[])
  204. {
  205. int i, j, maxlen;
  206. char *cp;
  207. Rune r;
  208. ARGBEGIN{
  209. case 'm':
  210. mime = 1;
  211. break;
  212. default:
  213. fprint(2, "usage: file [-m] [file...]\n");
  214. exits("usage");
  215. }ARGEND;
  216. maxlen = 0;
  217. if(mime == 0 || argc > 1){
  218. for(i = 0; i < argc; i++) {
  219. for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
  220. ;
  221. if(j > maxlen)
  222. maxlen = j;
  223. }
  224. }
  225. if (argc <= 0) {
  226. if(!mime)
  227. print ("stdin: ");
  228. filetype(0);
  229. }
  230. else {
  231. for(i = 0; i < argc; i++)
  232. type(argv[i], maxlen);
  233. }
  234. exits(0);
  235. }
  236. void
  237. type(char *file, int nlen)
  238. {
  239. Rune r;
  240. int i;
  241. char *p;
  242. if(nlen > 0){
  243. slash = 0;
  244. for (i = 0, p = file; *p; i++) {
  245. if (*p == '/') /* find rightmost slash */
  246. slash = p;
  247. p += chartorune(&r, p); /* count runes */
  248. }
  249. print("%s:%*s",file, nlen-i+1, "");
  250. }
  251. fname = file;
  252. if ((fd = open(file, OREAD)) < 0) {
  253. print("cannot open: %r\n");
  254. return;
  255. }
  256. filetype(fd);
  257. close(fd);
  258. }
  259. void
  260. filetype(int fd)
  261. {
  262. Rune r;
  263. int i, f, n;
  264. char *p, *eob;
  265. free(mbuf);
  266. mbuf = dirfstat(fd);
  267. if(mbuf == nil){
  268. print("cannot stat: %r\n");
  269. return;
  270. }
  271. if(mbuf->mode & DMDIR) {
  272. print(mime ? OCTET : "directory\n");
  273. return;
  274. }
  275. if(mbuf->type != 'M' && mbuf->type != '|') {
  276. print(mime ? OCTET : "special file #%C/%s\n",
  277. mbuf->type, mbuf->name);
  278. return;
  279. }
  280. /* may be reading a pipe on standard input */
  281. nbuf = readn(fd, buf, sizeof(buf)-1);
  282. if(nbuf < 0) {
  283. print("cannot read: %r\n");
  284. return;
  285. }
  286. if(nbuf == 0) {
  287. print(mime ? PLAIN : "empty file\n");
  288. return;
  289. }
  290. buf[nbuf] = 0;
  291. /*
  292. * build histogram table
  293. */
  294. memset(cfreq, 0, sizeof(cfreq));
  295. for (i = 0; language[i].name; i++)
  296. language[i].count = 0;
  297. eob = (char *)buf+nbuf;
  298. for(n = 0, p = (char *)buf; p < eob; n++) {
  299. if (!fullrune(p, eob-p) && eob-p < UTFmax)
  300. break;
  301. p += chartorune(&r, p);
  302. if (r == 0)
  303. f = Cnull;
  304. else if (r <= 0x7f) {
  305. if (!isprint(r) && !isspace(r))
  306. f = Ceascii; /* ASCII control char */
  307. else f = r;
  308. } else if (r == 0x80) {
  309. bump_utf_count(r);
  310. f = Cutf;
  311. } else if (r < 0xA0)
  312. f = Cbinary; /* Invalid Runes */
  313. else if (r <= 0xff)
  314. f = Clatin; /* Latin 1 */
  315. else {
  316. bump_utf_count(r);
  317. f = Cutf; /* UTF extension */
  318. }
  319. cfreq[f]++; /* ASCII chars peg directly */
  320. }
  321. /*
  322. * gross classify
  323. */
  324. if (cfreq[Cbinary])
  325. guess = Fbinary;
  326. else if (cfreq[Cutf])
  327. guess = Futf;
  328. else if (cfreq[Clatin])
  329. guess = Flatin;
  330. else if (cfreq[Ceascii])
  331. guess = Feascii;
  332. else if (cfreq[Cnull])
  333. guess = Fbinary;
  334. else
  335. guess = Fascii;
  336. /*
  337. * lookup dictionary words
  338. */
  339. memset(wfreq, 0, sizeof(wfreq));
  340. if(guess == Fascii || guess == Flatin || guess == Futf)
  341. wordfreq();
  342. /*
  343. * call individual classify routines
  344. */
  345. for(i=0; call[i]; i++)
  346. if((*call[i])())
  347. return;
  348. /*
  349. * if all else fails,
  350. * print out gross classification
  351. */
  352. if (nbuf < 100 && !mime)
  353. print(mime ? PLAIN : "short ");
  354. if (guess == Fascii)
  355. print(mime ? PLAIN : "Ascii\n");
  356. else if (guess == Feascii)
  357. print(mime ? PLAIN : "extended ascii\n");
  358. else if (guess == Flatin)
  359. print(mime ? PLAIN : "latin ascii\n");
  360. else if (guess == Futf && utf_count() < 4)
  361. print_utf();
  362. else print(mime ? OCTET : "binary\n");
  363. }
  364. void
  365. bump_utf_count(Rune r)
  366. {
  367. int low, high, mid;
  368. high = sizeof(language)/sizeof(language[0])-1;
  369. for (low = 0; low < high;) {
  370. mid = (low+high)/2;
  371. if (r >= language[mid].low) {
  372. if (r <= language[mid].high) {
  373. language[mid].count++;
  374. break;
  375. } else low = mid+1;
  376. } else high = mid;
  377. }
  378. }
  379. int
  380. utf_count(void)
  381. {
  382. int i, count;
  383. count = 0;
  384. for (i = 0; language[i].name; i++)
  385. if (language[i].count > 0)
  386. switch (language[i].mode) {
  387. case Normal:
  388. case First:
  389. count++;
  390. break;
  391. default:
  392. break;
  393. }
  394. return count;
  395. }
  396. int
  397. chkascii(void)
  398. {
  399. int i;
  400. for (i = 'a'; i < 'z'; i++)
  401. if (cfreq[i])
  402. return 1;
  403. for (i = 'A'; i < 'Z'; i++)
  404. if (cfreq[i])
  405. return 1;
  406. return 0;
  407. }
  408. int
  409. find_first(char *name)
  410. {
  411. int i;
  412. for (i = 0; language[i].name != 0; i++)
  413. if (language[i].mode == First
  414. && strcmp(language[i].name, name) == 0)
  415. return i;
  416. return -1;
  417. }
  418. void
  419. print_utf(void)
  420. {
  421. int i, printed, j;
  422. if(mime){
  423. print(PLAIN);
  424. return;
  425. }
  426. if (chkascii()) {
  427. printed = 1;
  428. print("Ascii");
  429. } else
  430. printed = 0;
  431. for (i = 0; language[i].name; i++)
  432. if (language[i].count) {
  433. switch(language[i].mode) {
  434. case Multi:
  435. j = find_first(language[i].name);
  436. if (j < 0)
  437. break;
  438. if (language[j].count > 0)
  439. break;
  440. /* Fall through */
  441. case Normal:
  442. case First:
  443. if (printed)
  444. print(" & ");
  445. else printed = 1;
  446. print("%s", language[i].name);
  447. break;
  448. case Shared:
  449. default:
  450. break;
  451. }
  452. }
  453. if(!printed)
  454. print("UTF");
  455. print(" text\n");
  456. }
  457. void
  458. wordfreq(void)
  459. {
  460. int low, high, mid, r;
  461. uint8_t *p, *p2, c;
  462. p = buf;
  463. for(;;) {
  464. while (p < buf+nbuf && !isalpha(*p))
  465. p++;
  466. if (p >= buf+nbuf)
  467. return;
  468. p2 = p;
  469. while(p < buf+nbuf && isalpha(*p))
  470. p++;
  471. c = *p;
  472. *p = 0;
  473. high = sizeof(dict)/sizeof(dict[0]);
  474. for(low = 0;low < high;) {
  475. mid = (low+high)/2;
  476. r = strcmp(dict[mid].word, (char*)p2);
  477. if(r == 0) {
  478. wfreq[dict[mid].class]++;
  479. break;
  480. }
  481. if(r < 0)
  482. low = mid+1;
  483. else
  484. high = mid;
  485. }
  486. *p++ = c;
  487. }
  488. }
  489. typedef struct Filemagic Filemagic;
  490. struct Filemagic {
  491. uint32_t x;
  492. uint32_t mask;
  493. char *desc;
  494. char *mime;
  495. };
  496. /*
  497. * integers in this table must be as seen on a little-endian machine
  498. * when read from a file.
  499. */
  500. Filemagic long0tab[] = {
  501. {0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET},
  502. /* "pac1" */
  503. {0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET},
  504. /* "pXc2 */
  505. {0x32630070, 0xFFFF00FF, "pac4 audio file\n", OCTET},
  506. {0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET},
  507. {0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", OCTET},
  508. {0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET},
  509. {0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip"},
  510. {070707, 0xFFFF, "cpio archive\n", OCTET},
  511. {0x2F7, 0xFFFF, "tex dvi\n", "application/dvi"},
  512. {0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg"},
  513. {0xf0ff, 0xf6ff, "aac audio\n", "audio/mpeg"},
  514. {0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be"},
  515. {0xfffe, 0xffffffff, "utf-32le\n", "text/plain charset=utf-32le"},
  516. {0xfeff, 0xffff, "utf-16be\n", "text/plain charset=utf-16be"},
  517. {0xfffe, 0xffff, "utf-16le\n", "text/plain charset=utf-16le"},
  518. /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
  519. {0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable\n", OCTET},
  520. /* 0xfeedfacf */
  521. {0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable\n", OCTET},
  522. /* 0xcefaedfe */
  523. {0xfeedface, 0xFFFFFFFF, "386 Mach-O executable\n", OCTET},
  524. /* 0xcffaedfe */
  525. {0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable\n", OCTET},
  526. /* 0xcafebabe */
  527. {0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable\n", OCTET},
  528. /*
  529. * these magic numbers are stored big-endian on disk,
  530. * thus the numbers appear reversed in this table.
  531. */
  532. {0xad4e5cd1, 0xFFFFFFFF, "venti arena\n", OCTET},
  533. {0x2bb19a52, 0xFFFFFFFF, "paq archive\n", OCTET},
  534. };
  535. int
  536. filemagic(Filemagic *tab, int ntab, uint32_t x)
  537. {
  538. int i;
  539. for(i=0; i<ntab; i++)
  540. if((x&tab[i].mask) == tab[i].x){
  541. print(mime ? tab[i].mime : tab[i].desc);
  542. return 1;
  543. }
  544. return 0;
  545. }
  546. int
  547. long0(void)
  548. {
  549. return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
  550. }
  551. typedef struct Fileoffmag Fileoffmag;
  552. struct Fileoffmag {
  553. uint32_t off;
  554. Filemagic Filemagic;
  555. };
  556. /*
  557. * integers in this table must be as seen on a little-endian machine
  558. * when read from a file.
  559. */
  560. Fileoffmag longofftab[] = {
  561. /*
  562. * these magic numbers are stored big-endian on disk,
  563. * thus the numbers appear reversed in this table.
  564. */
  565. {256*1024, { 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition\n", OCTET }},
  566. {256*1024, { 0xc75e5cd1, 0xFFFFFFFF, "venti index section\n", OCTET }},
  567. {128*1024, { 0x89ae7637, 0xFFFFFFFF, "fossil write buffer\n", OCTET }},
  568. {4, { 0x31647542, 0xFFFFFFFF, "OS X finder properties\n", OCTET }},
  569. };
  570. int
  571. fileoffmagic(Fileoffmag *tab, int ntab)
  572. {
  573. int i;
  574. uint32_t x;
  575. Fileoffmag *tp;
  576. uint8_t buf[sizeof(int32_t)];
  577. for(i=0; i<ntab; i++) {
  578. tp = tab + i;
  579. seek(fd, tp->off, 0);
  580. if (readn(fd, buf, sizeof buf) != sizeof buf)
  581. continue;
  582. x = LENDIAN(buf);
  583. if((x&tp->Filemagic.mask) == tp->Filemagic.x){
  584. print(mime? tp->Filemagic.mime: tp->Filemagic.desc);
  585. return 1;
  586. }
  587. }
  588. return 0;
  589. }
  590. int
  591. longoff(void)
  592. {
  593. return fileoffmagic(longofftab, nelem(longofftab));
  594. }
  595. int
  596. isexec(void)
  597. {
  598. Fhdr f;
  599. seek(fd, 0, 0); /* reposition to start of file */
  600. if(crackhdr(fd, &f)) {
  601. print(mime ? OCTET : "%s\n", f.name);
  602. return 1;
  603. }
  604. return 0;
  605. }
  606. /* from tar.c */
  607. enum { NAMSIZ = 100, TBLOCK = 512 };
  608. union hblock
  609. {
  610. char dummy[TBLOCK];
  611. struct header
  612. {
  613. char name[NAMSIZ];
  614. char mode[8];
  615. char uid[8];
  616. char gid[8];
  617. char size[12];
  618. char mtime[12];
  619. char chksum[8];
  620. char linkflag;
  621. char linkname[NAMSIZ];
  622. /* rest are defined by POSIX's ustar format; see p1003.2b */
  623. char magic[6]; /* "ustar" */
  624. char version[2];
  625. char uname[32];
  626. char gname[32];
  627. char devmajor[8];
  628. char devminor[8];
  629. char prefix[155]; /* if non-null, path = prefix "/" name */
  630. } dbuf;
  631. };
  632. int
  633. checksum(union hblock *hp)
  634. {
  635. int i;
  636. char *cp;
  637. struct header *hdr = &hp->dbuf;
  638. for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
  639. *cp = ' ';
  640. i = 0;
  641. for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
  642. i += *cp & 0xff;
  643. return i;
  644. }
  645. int
  646. istar(void)
  647. {
  648. int chksum;
  649. char tblock[TBLOCK];
  650. union hblock *hp = (union hblock *)tblock;
  651. struct header *hdr = &hp->dbuf;
  652. seek(fd, 0, 0); /* reposition to start of file */
  653. if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
  654. return 0;
  655. chksum = strtol(hdr->chksum, 0, 8);
  656. if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
  657. if (strcmp(hdr->magic, "ustar") == 0)
  658. print(mime? "application/x-ustar\n":
  659. "posix tar archive\n");
  660. else
  661. print(mime? "application/x-tar\n": "tar archive\n");
  662. return 1;
  663. }
  664. return 0;
  665. }
  666. /*
  667. * initial words to classify file
  668. */
  669. struct FILE_STRING
  670. {
  671. char *key;
  672. char *filetype;
  673. int length;
  674. char *mime;
  675. } file_string[] =
  676. {
  677. {"!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream"},
  678. {"!<arch>\n", "archive", 8, "application/octet-stream"},
  679. {"070707", "cpio archive - ascii header", 6, "application/octet-stream"},
  680. {"#!/bin/rc", "rc executable file", 9, "text/plain"},
  681. {"#!/bin/sh", "sh executable file", 9, "text/plain"},
  682. {"%!", "postscript", 2, "application/postscript"},
  683. {"\004%!", "postscript", 3, "application/postscript"},
  684. {"x T post", "troff output for post", 8, "application/troff"},
  685. {"x T Latin1", "troff output for Latin1", 10, "application/troff"},
  686. {"x T utf", "troff output for UTF", 7, "application/troff"},
  687. {"x T 202", "troff output for 202", 7, "application/troff"},
  688. {"x T aps", "troff output for aps", 7, "application/troff"},
  689. {"x T ", "troff output", 4, "application/troff"},
  690. {"GIF", "GIF image", 3, "image/gif"},
  691. {"\0PC Research, Inc\0","ghostscript fax file", 18, "application/ghostscript"},
  692. {"%PDF", "PDF", 4, "application/pdf"},
  693. {"<html>\n", "HTML file", 7, "text/html"},
  694. {"<HTML>\n", "HTML file", 7, "text/html"},
  695. {"\111\111\052\000", "tiff", 4, "image/tiff"},
  696. {"\115\115\000\052", "tiff", 4, "image/tiff"},
  697. {"\377\330\377\340", "jpeg", 4, "image/jpeg"},
  698. {"\377\330\377\341", "jpeg", 4, "image/jpeg"},
  699. {"\377\330\377\333", "jpeg", 4, "image/jpeg"},
  700. {"BM", "bmp", 2, "image/bmp"},
  701. {"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream"},
  702. {"<MakerFile ", "FrameMaker file", 11, "application/framemaker"},
  703. {"\033E\033", "HP PCL printer data", 3, OCTET},
  704. {"\033&", "HP PCL printer data", 2, OCTET},
  705. {"\033%-12345X", "HPJCL file", 9, "application/hpjcl"},
  706. {"\033Lua", "Lua bytecode", 4, OCTET},
  707. {"ID3", "mp3 audio with id3", 3, "audio/mpeg"},
  708. {"\211PNG", "PNG image", 4, "image/png"},
  709. {"P3\n", "ppm", 3, "image/ppm"},
  710. {"P6\n", "ppm", 3, "image/ppm"},
  711. {"/* XPM */\n", "xbm", 10, "image/xbm"},
  712. {".HTML ", "troff -ms input", 6, "text/troff"},
  713. {".LP", "troff -ms input", 3, "text/troff"},
  714. {".ND", "troff -ms input", 3, "text/troff"},
  715. {".PP", "troff -ms input", 3, "text/troff"},
  716. {".TL", "troff -ms input", 3, "text/troff"},
  717. {".TR", "troff -ms input", 3, "text/troff"},
  718. {".TH", "manual page", 3, "text/troff"},
  719. {".\\\"", "troff input", 3, "text/troff"},
  720. {".de", "troff input", 3, "text/troff"},
  721. {".if", "troff input", 3, "text/troff"},
  722. {".nr", "troff input", 3, "text/troff"},
  723. {".tr", "troff input", 3, "text/troff"},
  724. {"vac:", "venti score", 4, "text/plain"},
  725. {"-----BEGIN CERTIFICATE-----\n",
  726. "pem certificate", -1, "text/plain"},
  727. {"-----BEGIN TRUSTED CERTIFICATE-----\n",
  728. "pem trusted certificate", -1, "text/plain"},
  729. {"-----BEGIN X509 CERTIFICATE-----\n",
  730. "pem x.509 certificate", -1, "text/plain"},
  731. {"subject=/C=", "pem certificate with header", -1, "text/plain"},
  732. {"process snapshot ", "process snapshot", -1, "application/snapfs"},
  733. {"BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard"},
  734. {"BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard"},
  735. {0,0,0,0}
  736. };
  737. int
  738. istring(void)
  739. {
  740. int i, l;
  741. struct FILE_STRING *p;
  742. for(p = file_string; p->key; p++) {
  743. l = p->length;
  744. if(l == -1)
  745. l = strlen(p->key);
  746. if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
  747. if(mime)
  748. print("%s\n", p->mime);
  749. else
  750. print("%s\n", p->filetype);
  751. return 1;
  752. }
  753. }
  754. if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
  755. for(i = 5; i < nbuf; i++)
  756. if(buf[i] == '\n')
  757. break;
  758. if(mime)
  759. print(OCTET);
  760. else
  761. print("%.*s picture\n", utfnlen((char*)buf+5, i-5),
  762. (char*)buf+5);
  763. return 1;
  764. }
  765. return 0;
  766. }
  767. struct offstr
  768. {
  769. uint32_t off;
  770. struct FILE_STRING FILE_STRING;
  771. } offstrs[] = {
  772. {32*1024, { "\001CD001\001", "ISO9660 CD image", 7, OCTET }},
  773. {0, { 0, 0, 0, 0 }}
  774. };
  775. int
  776. isoffstr(void)
  777. {
  778. int n;
  779. char buf[256];
  780. struct offstr *p;
  781. for(p = offstrs; p->FILE_STRING.key; p++) {
  782. seek(fd, p->off, 0);
  783. n = p->FILE_STRING.length;
  784. if (n > sizeof buf)
  785. n = sizeof buf;
  786. if (readn(fd, buf, n) != n)
  787. continue;
  788. if(memcmp(buf, p->FILE_STRING.key, n) == 0) {
  789. if(mime)
  790. print("%s\n", p->FILE_STRING.mime);
  791. else
  792. print("%s\n", p->FILE_STRING.filetype);
  793. return 1;
  794. }
  795. }
  796. return 0;
  797. }
  798. int
  799. iff(void)
  800. {
  801. if (strncmp((char*)buf, "FORM", 4) == 0 &&
  802. strncmp((char*)buf+8, "AIFF", 4) == 0) {
  803. print("%s\n", mime? "audio/x-aiff": "aiff audio");
  804. return 1;
  805. }
  806. if (strncmp((char*)buf, "RIFF", 4) == 0) {
  807. if (strncmp((char*)buf+8, "WAVE", 4) == 0)
  808. print("%s\n", mime? "audio/wave": "wave audio");
  809. else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
  810. print("%s\n", mime? "video/avi": "avi video");
  811. else
  812. print("%s\n", mime? "application/octet-stream":
  813. "riff file");
  814. return 1;
  815. }
  816. return 0;
  817. }
  818. char* html_string[] =
  819. {
  820. "title",
  821. "body",
  822. "head",
  823. "strong",
  824. "h1",
  825. "h2",
  826. "h3",
  827. "h4",
  828. "h5",
  829. "h6",
  830. "ul",
  831. "li",
  832. "dl",
  833. "br",
  834. "em",
  835. 0,
  836. };
  837. int
  838. ishtml(void)
  839. {
  840. uint8_t *p, *q;
  841. int i, count;
  842. /* compare strings between '<' and '>' to html table */
  843. count = 0;
  844. p = buf;
  845. for(;;) {
  846. while (p < buf+nbuf && *p != '<')
  847. p++;
  848. p++;
  849. if (p >= buf+nbuf)
  850. break;
  851. if(*p == '/')
  852. p++;
  853. q = p;
  854. while(p < buf+nbuf && *p != '>')
  855. p++;
  856. if (p >= buf+nbuf)
  857. break;
  858. for(i = 0; html_string[i]; i++) {
  859. if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
  860. if(count++ > 4) {
  861. print(mime ? "text/html\n" : "HTML file\n");
  862. return 1;
  863. }
  864. break;
  865. }
  866. }
  867. p++;
  868. }
  869. return 0;
  870. }
  871. char* rfc822_string[] =
  872. {
  873. "from:",
  874. "date:",
  875. "to:",
  876. "subject:",
  877. "received:",
  878. "reply to:",
  879. "sender:",
  880. 0,
  881. };
  882. int
  883. isrfc822(void)
  884. {
  885. char *p, *q, *r;
  886. int i, count;
  887. count = 0;
  888. p = (char*)buf;
  889. for(;;) {
  890. q = strchr(p, '\n');
  891. if(q == nil)
  892. break;
  893. *q = 0;
  894. if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
  895. count++;
  896. *q = '\n';
  897. p = q+1;
  898. continue;
  899. }
  900. *q = '\n';
  901. if(*p != '\t' && *p != ' '){
  902. r = strchr(p, ':');
  903. if(r == 0 || r > q)
  904. break;
  905. for(i = 0; rfc822_string[i]; i++) {
  906. if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
  907. count++;
  908. break;
  909. }
  910. }
  911. }
  912. p = q+1;
  913. }
  914. if(count >= 3){
  915. print(mime ? "message/rfc822\n" : "email file\n");
  916. return 1;
  917. }
  918. return 0;
  919. }
  920. int
  921. ismbox(void)
  922. {
  923. char *p, *q;
  924. p = (char*)buf;
  925. q = strchr(p, '\n');
  926. if(q == nil)
  927. return 0;
  928. *q = 0;
  929. if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
  930. print(mime ? "text/plain\n" : "mail box\n");
  931. return 1;
  932. }
  933. *q = '\n';
  934. return 0;
  935. }
  936. int
  937. iscint(void)
  938. {
  939. int type;
  940. char *name;
  941. Biobuf b;
  942. if(Binit(&b, fd, OREAD) == Beof)
  943. return 0;
  944. seek(fd, 0, 0);
  945. type = objtype(&b, &name);
  946. if(type < 0)
  947. return 0;
  948. if(mime)
  949. print(OCTET);
  950. else
  951. print("%s intermediate\n", name);
  952. return 1;
  953. }
  954. int
  955. isc(void)
  956. {
  957. int n;
  958. n = wfreq[I1];
  959. /*
  960. * includes
  961. */
  962. if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
  963. goto yes;
  964. if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
  965. goto yes;
  966. /*
  967. * declarations
  968. */
  969. if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
  970. goto yes;
  971. /*
  972. * assignments
  973. */
  974. if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
  975. goto yes;
  976. return 0;
  977. yes:
  978. if(mime){
  979. print(PLAIN);
  980. return 1;
  981. }
  982. if(wfreq[Alword] > 0)
  983. print("alef program\n");
  984. else
  985. print("c program\n");
  986. return 1;
  987. }
  988. int
  989. islimbo(void)
  990. {
  991. /*
  992. * includes
  993. */
  994. if(wfreq[Lword] < 4)
  995. return 0;
  996. print(mime ? PLAIN : "limbo program\n");
  997. return 1;
  998. }
  999. int
  1000. isas(void)
  1001. {
  1002. /*
  1003. * includes
  1004. */
  1005. if(wfreq[Aword] < 2)
  1006. return 0;
  1007. print(mime ? PLAIN : "as program\n");
  1008. return 1;
  1009. }
  1010. /*
  1011. * low entropy means encrypted
  1012. */
  1013. int
  1014. ismung(void)
  1015. {
  1016. int i, bucket[8];
  1017. float cs;
  1018. if(nbuf < 64)
  1019. return 0;
  1020. memset(bucket, 0, sizeof(bucket));
  1021. for(i=nbuf-64; i<nbuf; i++)
  1022. bucket[(buf[i]>>5)&07] += 1;
  1023. cs = 0.;
  1024. for(i=0; i<8; i++)
  1025. cs += (bucket[i]-8)*(bucket[i]-8);
  1026. cs /= 8.;
  1027. if(cs <= 24.322) {
  1028. if(buf[0]==0x1f && buf[1]==0x9d)
  1029. print(mime ? OCTET : "compressed\n");
  1030. else
  1031. if(buf[0]==0x1f && buf[1]==0x8b)
  1032. print(mime ? OCTET : "gzip compressed\n");
  1033. else
  1034. if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
  1035. print(mime ? OCTET : "bzip2 compressed\n");
  1036. else
  1037. print(mime ? OCTET : "encrypted\n");
  1038. return 1;
  1039. }
  1040. return 0;
  1041. }
  1042. /*
  1043. * english by punctuation and frequencies
  1044. */
  1045. int
  1046. isenglish(void)
  1047. {
  1048. int vow, comm, rare, badpun, punct;
  1049. char *p;
  1050. if(guess != Fascii && guess != Feascii)
  1051. return 0;
  1052. badpun = 0;
  1053. punct = 0;
  1054. for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
  1055. switch(*p) {
  1056. case '.':
  1057. case ',':
  1058. case ')':
  1059. case '%':
  1060. case ';':
  1061. case ':':
  1062. case '?':
  1063. punct++;
  1064. if(p[1] != ' ' && p[1] != '\n')
  1065. badpun++;
  1066. }
  1067. if(badpun*5 > punct)
  1068. return 0;
  1069. if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
  1070. return 0;
  1071. if(2*cfreq[';'] > cfreq['e'])
  1072. return 0;
  1073. vow = 0;
  1074. for(p="AEIOU"; *p; p++) {
  1075. vow += cfreq[(uint8_t)*p];
  1076. vow += cfreq[tolower(*p)];
  1077. }
  1078. comm = 0;
  1079. for(p="ETAION"; *p; p++) {
  1080. comm += cfreq[(uint8_t)*p];
  1081. comm += cfreq[tolower(*p)];
  1082. }
  1083. rare = 0;
  1084. for(p="VJKQXZ"; *p; p++) {
  1085. rare += cfreq[(uint8_t)*p];
  1086. rare += cfreq[tolower(*p)];
  1087. }
  1088. if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
  1089. print(mime ? PLAIN : "English text\n");
  1090. return 1;
  1091. }
  1092. return 0;
  1093. }
  1094. /*
  1095. * pick up a number with
  1096. * syntax _*[0-9]+_
  1097. */
  1098. #define P9BITLEN 12
  1099. int
  1100. p9bitnum(uint8_t *bp)
  1101. {
  1102. int n, c, len;
  1103. len = P9BITLEN;
  1104. while(*bp == ' ') {
  1105. bp++;
  1106. len--;
  1107. if(len <= 0)
  1108. return -1;
  1109. }
  1110. n = 0;
  1111. while(len > 1) {
  1112. c = *bp++;
  1113. if(!isdigit(c))
  1114. return -1;
  1115. n = n*10 + c-'0';
  1116. len--;
  1117. }
  1118. if(*bp != ' ')
  1119. return -1;
  1120. return n;
  1121. }
  1122. int
  1123. depthof(char *s, int *newp)
  1124. {
  1125. char *es;
  1126. int d;
  1127. *newp = 0;
  1128. es = s+12;
  1129. while(s<es && *s==' ')
  1130. s++;
  1131. if(s == es)
  1132. return -1;
  1133. if('0'<=*s && *s<='9')
  1134. return 1<<strtol(s, 0, 0);
  1135. *newp = 1;
  1136. d = 0;
  1137. while(s<es && *s!=' '){
  1138. s++; /* skip letter */
  1139. d += strtoul((const char *)s, &s, 10);
  1140. }
  1141. if(d % 8 == 0 || 8 % d == 0)
  1142. return d;
  1143. else
  1144. return -1;
  1145. }
  1146. int
  1147. isp9bit(void)
  1148. {
  1149. int dep, lox, loy, hix, hiy, px, new, cmpr;
  1150. uint32_t t;
  1151. int32_t len;
  1152. char *newlabel;
  1153. uint8_t *cp;
  1154. cp = buf;
  1155. cmpr = 0;
  1156. newlabel = "old ";
  1157. if(memcmp(cp, "compressed\n", 11) == 0) {
  1158. cmpr = 1;
  1159. cp = buf + 11;
  1160. }
  1161. dep = depthof((char*)cp + 0*P9BITLEN, &new);
  1162. if(new)
  1163. newlabel = "";
  1164. lox = p9bitnum(cp + 1*P9BITLEN);
  1165. loy = p9bitnum(cp + 2*P9BITLEN);
  1166. hix = p9bitnum(cp + 3*P9BITLEN);
  1167. hiy = p9bitnum(cp + 4*P9BITLEN);
  1168. if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
  1169. return 0;
  1170. if(dep < 8){
  1171. px = 8/dep; /* pixels per byte */
  1172. /* set l to number of bytes of data per scan line */
  1173. if(lox >= 0)
  1174. len = (hix+px-1)/px - lox/px;
  1175. else{ /* make positive before divide */
  1176. t = (-lox)+px-1;
  1177. t = (t/px)*px;
  1178. len = (t+hix+px-1)/px;
  1179. }
  1180. }else
  1181. len = (hix-lox)*dep/8;
  1182. len *= hiy - loy; /* col length */
  1183. len += 5 * P9BITLEN; /* size of initial ascii */
  1184. /*
  1185. * for compressed images, don't look any further. otherwise:
  1186. * for image file, length is non-zero and must match calculation above.
  1187. * for /dev/window and /dev/screen the length is always zero.
  1188. * for subfont, the subfont header should follow immediately.
  1189. */
  1190. if (cmpr) {
  1191. print(mime ? OCTET : "Compressed %splan 9 image or subfont, depth %d\n",
  1192. newlabel, dep);
  1193. return 1;
  1194. }
  1195. /*
  1196. * mbuf->length == 0 probably indicates reading a pipe.
  1197. * Ghostscript sometimes produces a little extra on the end.
  1198. */
  1199. if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
  1200. (mbuf->length > len && mbuf->length < len+P9BITLEN))) {
  1201. print(mime ? OCTET : "%splan 9 image, depth %d\n", newlabel, dep);
  1202. return 1;
  1203. }
  1204. if (p9subfont(buf+len)) {
  1205. print(mime ? OCTET : "%ssubfont file, depth %d\n", newlabel, dep);
  1206. return 1;
  1207. }
  1208. return 0;
  1209. }
  1210. int
  1211. p9subfont(uint8_t *p)
  1212. {
  1213. int n, h, a;
  1214. /* if image too big, assume it's a subfont */
  1215. if (p+3*P9BITLEN > buf+sizeof(buf))
  1216. return 1;
  1217. n = p9bitnum(p + 0*P9BITLEN); /* char count */
  1218. if (n < 0)
  1219. return 0;
  1220. h = p9bitnum(p + 1*P9BITLEN); /* height */
  1221. if (h < 0)
  1222. return 0;
  1223. a = p9bitnum(p + 2*P9BITLEN); /* ascent */
  1224. if (a < 0)
  1225. return 0;
  1226. return 1;
  1227. }
  1228. #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
  1229. int
  1230. isp9font(void)
  1231. {
  1232. uint8_t *cp, *p;
  1233. int i, n;
  1234. char pathname[1024];
  1235. cp = buf;
  1236. if (!getfontnum(cp, &cp)) /* height */
  1237. return 0;
  1238. if (!getfontnum(cp, &cp)) /* ascent */
  1239. return 0;
  1240. for (i = 0; (cp=(uint8_t*)strchr((char*)cp, '\n')) != nil; i++) {
  1241. if (!getfontnum(cp, &cp)) /* min */
  1242. break;
  1243. if (!getfontnum(cp, &cp)) /* max */
  1244. return 0;
  1245. getfontnum(cp, &cp); /* optional offset */
  1246. while (WHITESPACE(*cp))
  1247. cp++;
  1248. for (p = cp; *cp && !WHITESPACE(*cp); cp++)
  1249. ;
  1250. /* construct a path name, if needed */
  1251. n = 0;
  1252. if (*p != '/' && slash) {
  1253. n = slash-fname+1;
  1254. if (n < sizeof(pathname))
  1255. memcpy(pathname, fname, n);
  1256. else n = 0;
  1257. }
  1258. if (n+cp-p+4 < sizeof(pathname)) {
  1259. memcpy(pathname+n, p, cp-p);
  1260. n += cp-p;
  1261. pathname[n] = 0;
  1262. if (access(pathname, AEXIST) < 0) {
  1263. strcpy(pathname+n, ".0");
  1264. if (access(pathname, AEXIST) < 0)
  1265. return 0;
  1266. }
  1267. }
  1268. }
  1269. if (i) {
  1270. print(mime ? "text/plain\n" : "font file\n");
  1271. return 1;
  1272. }
  1273. return 0;
  1274. }
  1275. int
  1276. getfontnum(uint8_t *cp, uint8_t **rp)
  1277. {
  1278. while (WHITESPACE(*cp)) /* extract uint32_t delimited by whitespace */
  1279. cp++;
  1280. if (*cp < '0' || *cp > '9')
  1281. return 0;
  1282. strtoul((const char *)cp, (char **)rp, 0);
  1283. if (!WHITESPACE(**rp)) {
  1284. *rp = cp;
  1285. return 0;
  1286. }
  1287. return 1;
  1288. }
  1289. int
  1290. isrtf(void)
  1291. {
  1292. if(strstr((char *)buf, "\\rtf1")){
  1293. print(mime ? "application/rtf\n" : "rich text format\n");
  1294. return 1;
  1295. }
  1296. return 0;
  1297. }
  1298. int
  1299. ismsdos(void)
  1300. {
  1301. if (buf[0] == 0x4d && buf[1] == 0x5a){
  1302. print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
  1303. return 1;
  1304. }
  1305. return 0;
  1306. }
  1307. int
  1308. iself(void)
  1309. {
  1310. static char *cpu[] = { /* NB: incomplete and arbitary list */
  1311. [1] = "WE32100",
  1312. [2] = "SPARC",
  1313. [3] = "i386",
  1314. [4] = "M68000",
  1315. [5] = "M88000",
  1316. [6] = "i486",
  1317. [7] = "i860",
  1318. [8] = "R3000",
  1319. [9] = "S370",
  1320. [10] = "R4000",
  1321. [15] = "HP-PA",
  1322. [18] = "sparc v8+",
  1323. [19] = "i960",
  1324. [20] = "PPC-32",
  1325. [21] = "PPC-64",
  1326. [40] = "ARM",
  1327. [41] = "Alpha",
  1328. [43] = "sparc v9",
  1329. [50] = "IA-64",
  1330. [62] = "AMD64",
  1331. [75] = "VAX",
  1332. };
  1333. static char *type[] = {
  1334. [1] = "relocatable object",
  1335. [2] = "executable",
  1336. [3] = "shared library",
  1337. [4] = "core dump",
  1338. };
  1339. if (memcmp(buf, "\x7f""ELF", 4) == 0){
  1340. if (!mime){
  1341. int isdifend = 0;
  1342. int n = (buf[19] << 8) | buf[18];
  1343. char *p = "unknown";
  1344. char *t = "unknown";
  1345. if (n > 0 && n < nelem(cpu) && cpu[n])
  1346. p = cpu[n];
  1347. else {
  1348. /* try the other byte order */
  1349. isdifend = 1;
  1350. n = (buf[18] << 8) | buf[19];
  1351. if (n > 0 && n < nelem(cpu) && cpu[n])
  1352. p = cpu[n];
  1353. }
  1354. if(isdifend)
  1355. n = (buf[16]<< 8) | buf[17];
  1356. else
  1357. n = (buf[17]<< 8) | buf[16];
  1358. if(n>0 && n < nelem(type) && type[n])
  1359. t = type[n];
  1360. print("%s ELF%s %s\n", p, (buf[4] == 2? "64": "32"), t);
  1361. }
  1362. else
  1363. print("application/x-elf-executable");
  1364. return 1;
  1365. }
  1366. return 0;
  1367. }
  1368. int
  1369. isface(void)
  1370. {
  1371. int i, j, ldepth, l;
  1372. char *p;
  1373. ldepth = -1;
  1374. for(j = 0; j < 3; j++){
  1375. for(p = (char*)buf, i=0; i<3; i++){
  1376. if(p[0] != '0' || p[1] != 'x')
  1377. return 0;
  1378. if(buf[2+8] == ',')
  1379. l = 2;
  1380. else if(buf[2+4] == ',')
  1381. l = 1;
  1382. else
  1383. return 0;
  1384. if(ldepth == -1)
  1385. ldepth = l;
  1386. if(l != ldepth)
  1387. return 0;
  1388. strtoul((const char *)p, &p, 16);
  1389. if(*p++ != ',')
  1390. return 0;
  1391. while(*p == ' ' || *p == '\t')
  1392. p++;
  1393. }
  1394. if (*p++ != '\n')
  1395. return 0;
  1396. }
  1397. if(mime)
  1398. print("application/x-face\n");
  1399. else
  1400. print("face image depth %d\n", ldepth);
  1401. return 1;
  1402. }