unicode.c 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
  5. char hex[] = "0123456789abcdefABCDEF";
  6. int numout = 0;
  7. int text = 0;
  8. char *err;
  9. Biobuf bout;
  10. char *range(char*[]);
  11. char *nums(char*[]);
  12. char *chars(char*[]);
  13. void
  14. main(int argc, char *argv[])
  15. {
  16. ARGBEGIN{
  17. case 'n':
  18. numout = 1;
  19. break;
  20. case 't':
  21. text = 1;
  22. break;
  23. }ARGEND
  24. Binit(&bout, 1, OWRITE);
  25. if(argc == 0){
  26. fprint(2, "usage: %s\n", usage);
  27. exits("usage");
  28. }
  29. if(!numout && utfrune(argv[0], '-'))
  30. exits(range(argv));
  31. if(numout || strchr(hex, argv[0][0])==0)
  32. exits(nums(argv));
  33. exits(chars(argv));
  34. }
  35. char*
  36. range(char *argv[])
  37. {
  38. char *q;
  39. int min, max;
  40. int i;
  41. while(*argv){
  42. q = *argv;
  43. if(strchr(hex, q[0]) == 0){
  44. err:
  45. fprint(2, "unicode: bad range %s\n", *argv);
  46. return "bad range";
  47. }
  48. min = strtoul(q, &q, 16);
  49. if(min<0 || min>0xFFFF || *q!='-')
  50. goto err;
  51. q++;
  52. if(strchr(hex, *q) == 0)
  53. goto err;
  54. max = strtoul(q, &q, 16);
  55. if(max<0 || max>0xFFFF || max<min || *q!=0)
  56. goto err;
  57. i = 0;
  58. do{
  59. Bprint(&bout, "%.4x %C", min, min);
  60. i++;
  61. if(min==max || (i&7)==0)
  62. Bprint(&bout, "\n");
  63. else
  64. Bprint(&bout, "\t");
  65. min++;
  66. }while(min<=max);
  67. argv++;
  68. }
  69. return 0;
  70. }
  71. char*
  72. nums(char *argv[])
  73. {
  74. char *q;
  75. Rune r;
  76. int w;
  77. while(*argv){
  78. q = *argv;
  79. while(*q){
  80. w = chartorune(&r, q);
  81. if(r==0x80 && (q[0]&0xFF)!=0x80){
  82. fprint(2, "unicode: invalid utf string %s\n", *argv);
  83. return "bad utf";
  84. }
  85. Bprint(&bout, "%.4x\n", r);
  86. q += w;
  87. }
  88. argv++;
  89. }
  90. return 0;
  91. }
  92. char*
  93. chars(char *argv[])
  94. {
  95. char *q;
  96. int m;
  97. while(*argv){
  98. q = *argv;
  99. if(strchr(hex, q[0]) == 0){
  100. err:
  101. fprint(2, "unicode: bad unicode value %s\n", *argv);
  102. return "bad char";
  103. }
  104. m = strtoul(q, &q, 16);
  105. if(m<0 || m>0xFFFF || *q!=0)
  106. goto err;
  107. Bprint(&bout, "%C", m);
  108. if(!text)
  109. Bprint(&bout, "\n");
  110. argv++;
  111. }
  112. return 0;
  113. }