classify.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <ndb.h>
  5. #include "whois.h"
  6. typedef struct Country Country;
  7. struct Country
  8. {
  9. char *code;
  10. char *name;
  11. };
  12. Country badc[] =
  13. {
  14. {"af", "afghanistan"},
  15. {"cu", "cuba"},
  16. {"ir", "iran"},
  17. {"iq", "iraq"},
  18. {"ly", "libya"},
  19. {"kp", "north korea"},
  20. {"sd", "sudan"},
  21. {"sy", "syria"},
  22. { 0, 0 }
  23. };
  24. Country goodc[] =
  25. {
  26. // the original, us and canada
  27. {"us", "united states of america"},
  28. {"ca", "canada"},
  29. {"gov", "gov"},
  30. {"mil", "mil"},
  31. // the european union
  32. { "eu", "european union" },
  33. { "be", "belgium" },
  34. { "de", "germany" },
  35. { "fr", "france" },
  36. { "it", "italy" },
  37. { "lu", "luxembourg" },
  38. { "nl", "netherlands" },
  39. { "dk", "denmark" },
  40. { "ie", "ireland" },
  41. { "gb", "great britain" },
  42. { "uk", "united kingdom" },
  43. { "gr", "greece" },
  44. { "es", "spain" },
  45. { "pt", "portugal" },
  46. { "au", "australia" },
  47. { "fi", "finland" },
  48. { "se", "sweden" },
  49. // the rest
  50. {"au", "australia"},
  51. {"no", "norway"},
  52. {"cz", "czech republic"},
  53. {"hu", "hungary"},
  54. {"pl", "poland"},
  55. {"jp", "japan"},
  56. {"ch", "switzerland"},
  57. {"nz", "new zealand"},
  58. { 0, 0 }
  59. };
  60. char *gov[] =
  61. {
  62. "gov",
  63. "gouv",
  64. "mil",
  65. "government",
  66. 0,
  67. };
  68. Country allc[] =
  69. {
  70. { "ad", "andorra" },
  71. { "ae", "united arab emirates" },
  72. { "af", "afghanistan" },
  73. { "ag", "antigua and barbuda" },
  74. { "ai", "anguilla" },
  75. { "al", "albania" },
  76. { "am", "armenia" },
  77. { "an", "netherlands antilles" },
  78. { "ao", "angola" },
  79. { "aq", "antarctica" },
  80. { "ar", "argentina" },
  81. { "as", "american samoa" },
  82. { "at", "austria" },
  83. { "au", "australia" },
  84. { "aw", "aruba" },
  85. { "az", "azerbaijan" },
  86. { "ba", "bosnia and herzegovina" },
  87. { "bb", "barbados" },
  88. { "bd", "bangladesh" },
  89. { "be", "belgium" },
  90. { "bf", "burkina faso" },
  91. { "bg", "bulgaria" },
  92. { "bh", "bahrain" },
  93. { "bi", "burundi" },
  94. { "bj", "benin" },
  95. { "bm", "bermuda" },
  96. { "bn", "brunei darussalam" },
  97. { "bo", "bolivia" },
  98. { "br", "brazil" },
  99. { "bs", "bahamas" },
  100. { "bt", "bhutan" },
  101. { "bu", "burma" },
  102. { "bv", "bouvet island" },
  103. { "bw", "botswana" },
  104. { "by", "belarus" },
  105. { "bz", "belize" },
  106. { "ca", "canada" },
  107. { "cc", "cocos (keeling) islands" },
  108. { "cf", "central african republic" },
  109. { "cg", "congo" },
  110. { "ch", "switzerland" },
  111. { "ci", "cote d'ivoire (ivory coast)" },
  112. { "ck", "cook islands" },
  113. { "cl", "chile" },
  114. { "cm", "cameroon" },
  115. { "cn", "china" },
  116. { "co", "colombia" },
  117. { "cr", "costa rica" },
  118. { "cs", "czechoslovakia (former)" },
  119. { "ct", "canton and enderbury island" },
  120. { "cu", "cuba" },
  121. { "cv", "cape verde" },
  122. { "cx", "christmas island" },
  123. { "cy", "cyprus" },
  124. { "cz", "czech republic" },
  125. { "dd", "german democratic republic" },
  126. { "de", "germany" },
  127. { "dj", "djibouti" },
  128. { "dk", "denmark" },
  129. { "dm", "dominica" },
  130. { "do", "dominican republic" },
  131. { "dz", "algeria" },
  132. { "ec", "ecuador" },
  133. { "ee", "estonia" },
  134. { "eg", "egypt" },
  135. { "eh", "western sahara" },
  136. { "er", "eritrea" },
  137. { "es", "spain" },
  138. { "et", "ethiopia" },
  139. { "eu", "european union" },
  140. { "fi", "finland" },
  141. { "fj", "fiji" },
  142. { "fk", "falkland islands (malvinas)" },
  143. { "fm", "micronesia" },
  144. { "fo", "faroe islands" },
  145. { "fr", "france" },
  146. { "fx", "france, metropolitan" },
  147. { "ga", "gabon" },
  148. { "gb", "great britain (uk)" },
  149. { "gd", "grenada" },
  150. { "ge", "georgia" },
  151. { "gf", "french guiana" },
  152. { "gh", "ghana" },
  153. { "gi", "gibraltar" },
  154. { "gl", "greenland" },
  155. { "gm", "gambia" },
  156. { "gn", "guinea" },
  157. { "gp", "guadeloupe" },
  158. { "gq", "equatorial guinea" },
  159. { "gr", "greece" },
  160. { "gs", "s. georgia and s. sandwich isls." },
  161. { "gt", "guatemala" },
  162. { "gu", "guam" },
  163. { "gw", "guinea-bissau" },
  164. { "gy", "guyana" },
  165. { "hk", "hong kong" },
  166. { "hm", "heard and mcdonald islands" },
  167. { "hn", "honduras" },
  168. { "hr", "croatia (hrvatska)" },
  169. { "ht", "haiti" },
  170. { "hu", "hungary" },
  171. { "id", "indonesia" },
  172. { "ie", "ireland" },
  173. { "il", "israel" },
  174. { "in", "india" },
  175. { "io", "british indian ocean territory" },
  176. { "iq", "iraq" },
  177. { "ir", "iran" },
  178. { "is", "iceland" },
  179. { "it", "italy" },
  180. { "jm", "jamaica" },
  181. { "jo", "jordan" },
  182. { "jp", "japan" },
  183. { "jt", "johnston island" },
  184. { "ke", "kenya" },
  185. { "kg", "kyrgyzstan" },
  186. { "kh", "cambodia (democratic kampuchea)" },
  187. { "ki", "kiribati" },
  188. { "km", "comoros" },
  189. { "kn", "saint kitts and nevis" },
  190. { "kp", "korea (north)" },
  191. { "kr", "korea (south)" },
  192. { "kw", "kuwait" },
  193. { "ky", "cayman islands" },
  194. { "kz", "kazakhstan" },
  195. { "la", "laos" },
  196. { "lb", "lebanon" },
  197. { "lc", "saint lucia" },
  198. { "li", "liechtenstein" },
  199. { "lk", "sri lanka" },
  200. { "lr", "liberia" },
  201. { "ls", "lesotho" },
  202. { "lt", "lithuania" },
  203. { "lu", "luxembourg" },
  204. { "lv", "latvia" },
  205. { "ly", "libya" },
  206. { "ma", "morocco" },
  207. { "mc", "monaco" },
  208. { "md", "moldova" },
  209. { "mg", "madagascar" },
  210. { "mh", "marshall islands" },
  211. { "mi", "midway islands" },
  212. { "mk", "macedonia" },
  213. { "ml", "mali" },
  214. { "mm", "myanmar" },
  215. { "mn", "mongolia" },
  216. { "mo", "macau" },
  217. { "mp", "northern mariana islands" },
  218. { "mq", "martinique" },
  219. { "mr", "mauritania" },
  220. { "ms", "montserrat" },
  221. { "mt", "malta" },
  222. { "mu", "mauritius" },
  223. { "mv", "maldives" },
  224. { "mw", "malawi" },
  225. { "mx", "mexico" },
  226. { "my", "malaysia" },
  227. { "mz", "mozambique" },
  228. { "na", "namibia" },
  229. { "nc", "new caledonia" },
  230. { "ne", "niger" },
  231. { "nf", "norfolk island" },
  232. { "ng", "nigeria" },
  233. { "ni", "nicaragua" },
  234. { "nl", "netherlands" },
  235. { "no", "norway" },
  236. { "np", "nepal" },
  237. { "nq", "dronning maud land" },
  238. { "nr", "nauru" },
  239. { "nt", "neutral zone" },
  240. { "nu", "niue" },
  241. { "nz", "new zealand (aotearoa)" },
  242. { "om", "oman" },
  243. { "pa", "panama" },
  244. { "pc", "pacific islands" },
  245. { "pe", "peru" },
  246. { "pf", "french polynesia" },
  247. { "pg", "papua new guinea" },
  248. { "ph", "philippines" },
  249. { "pk", "pakistan" },
  250. { "pl", "poland" },
  251. { "pm", "st. pierre and miquelon" },
  252. { "pn", "pitcairn" },
  253. { "pr", "puerto rico" },
  254. { "pu", "united states misc. pacific islands" },
  255. { "pt", "portugal" },
  256. { "pw", "palau" },
  257. { "py", "paraguay" },
  258. { "qa", "qatar" },
  259. { "re", "reunion" },
  260. { "ro", "romania" },
  261. { "ru", "russian federation" },
  262. { "rw", "rwanda" },
  263. { "sa", "saudi arabia" },
  264. { "sb", "solomon islands" },
  265. { "sc", "seychelles" },
  266. { "sd", "sudan" },
  267. { "se", "sweden" },
  268. { "sg", "singapore" },
  269. { "sh", "st. helena" },
  270. { "si", "slovenia" },
  271. { "sj", "svalbard and jan mayen islands" },
  272. { "sk", "slovak republic" },
  273. { "sl", "sierra leone" },
  274. { "sm", "san marino" },
  275. { "sn", "senegal" },
  276. { "so", "somalia" },
  277. { "sr", "suriname" },
  278. { "st", "sao tome and principe" },
  279. { "su", "ussr (former)" },
  280. { "sv", "el salvador" },
  281. { "sy", "syria" },
  282. { "sz", "swaziland" },
  283. { "tc", "turks and caicos islands" },
  284. { "td", "chad" },
  285. { "tf", "french southern territories" },
  286. { "tg", "togo" },
  287. { "th", "thailand" },
  288. { "tj", "tajikistan" },
  289. { "tk", "tokelau" },
  290. { "tm", "turkmenistan" },
  291. { "tn", "tunisia" },
  292. { "to", "tonga" },
  293. { "tp", "east timor" },
  294. { "tr", "turkey" },
  295. { "tt", "trinidad and tobago" },
  296. { "tv", "tuvalu" },
  297. { "tw", "taiwan" },
  298. { "tz", "tanzania" },
  299. { "ua", "ukraine" },
  300. { "ug", "uganda" },
  301. { "uk", "united kingdom" },
  302. { "um", "us minor outlying islands" },
  303. { "us", "united states" },
  304. { "uy", "uruguay" },
  305. { "uz", "uzbekistan" },
  306. { "va", "vatican city state (holy see)" },
  307. { "vc", "saint vincent and the grenadines" },
  308. { "ve", "venezuela" },
  309. { "vg", "virgin islands (british)" },
  310. { "vi", "virgin islands (u.s.)" },
  311. { "vn", "viet nam" },
  312. { "vu", "vanuatu" },
  313. { "wf", "wallis and futuna islands" },
  314. { "wk", "wake island" },
  315. { "ws", "samoa" },
  316. { "yd", "democratic yemen" },
  317. { "ye", "yemen" },
  318. { "yt", "mayotte" },
  319. { "yu", "yugoslavia" },
  320. { "za", "south africa" },
  321. { "zm", "zambia" },
  322. { "zr", "zaire" },
  323. { "zw", "zimbabwe" },
  324. {"gov", "gov"},
  325. {"mil", "mil"},
  326. { 0, 0 }
  327. };
  328. int classdebug;
  329. static int
  330. incountries(char *s, Country *cp)
  331. {
  332. for(; cp->code != 0; cp++)
  333. if(cistrcmp(s, cp->code) == 0
  334. || cistrcmp(s, cp->name) == 0)
  335. return 1;
  336. return 0;
  337. }
  338. static int
  339. indomains(char *s, char **dp)
  340. {
  341. for(; *dp != nil; dp++)
  342. if(cistrcmp(s, *dp) == 0)
  343. return 1;
  344. return 0;
  345. }
  346. int
  347. classify(char *ip, Ndbtuple *t)
  348. {
  349. int isgov, iscountry, isbadc, isgoodc;
  350. char dom[256];
  351. char *df[128];
  352. Ndbtuple *nt, *x;
  353. int n;
  354. isgov = iscountry = isbadc = 0;
  355. isgoodc = 1;
  356. for(nt = t; nt != nil; nt = nt->entry){
  357. if(strcmp(nt->attr, "country") == 0){
  358. iscountry = 1;
  359. if(incountries(nt->val, badc)){
  360. if(classdebug)fprint(2, "isbadc\n");
  361. isbadc = 1;
  362. isgoodc = 0;
  363. } else if(!incountries(nt->val, goodc)){
  364. if(classdebug)fprint(2, "!isgoodc\n");
  365. isgoodc = 0;
  366. }
  367. }
  368. /* domain names can always hurt, even without forward verification */
  369. if(strcmp(nt->attr, "dom") == 0){
  370. strncpy(dom, nt->val, sizeof dom);
  371. dom[sizeof(dom)-1] = 0;
  372. n = getfields(dom, df, nelem(df), 0, ".");
  373. /* a bad country in a domain name is always believed */
  374. if(incountries(df[n-1], badc)){
  375. if(classdebug)fprint(2, "isbadc dom\n");
  376. isbadc = 1;
  377. isgoodc = 0;
  378. }
  379. /* a goverment in a domain name is always believed */
  380. if(n > 1 && indomains(df[n-2], gov))
  381. isgov = 1;
  382. }
  383. }
  384. if(iscountry == 0){
  385. /* did the forward lookup work? */
  386. for(nt = t; nt != nil; nt = nt->entry){
  387. if(strcmp(nt->attr, "ip") == 0 && strcmp(nt->val, ip) == 0)
  388. break;
  389. }
  390. /* see if the domain name ends in a country code */
  391. if(nt != nil && (x = ndbfindattr(t, nt, "dom")) != nil){
  392. strncpy(dom, x->val, sizeof dom);
  393. dom[sizeof(dom)-1] = 0;
  394. n = getfields(dom, df, nelem(df), 0, ".");
  395. if(incountries(df[n-1], allc))
  396. iscountry = 1;
  397. }
  398. }
  399. if(iscountry == 0)
  400. return Cunknown;
  401. if(isbadc)
  402. return Cbadc;
  403. if(!isgoodc && isgov)
  404. return Cbadgov;
  405. return Cok;
  406. }