canonind.awk 665 B

1234567891011121314151617181920212223242526272829
  1. # turn output of mkindex into form needed by dict
  2. BEGIN {
  3. if(ARGC != 2) {
  4. print "Usage: awk -F' ' -f canonind.awk rawindex > index"
  5. exit 1
  6. }
  7. file = ARGV[1]
  8. ARGV[1] = ""
  9. while ((getline < file) > 0) {
  10. for(i = 2; i <= NF; i++) {
  11. w = $i
  12. if(length(w) == 0)
  13. continue
  14. b = index(w, "(")
  15. e = index(w, ")")
  16. if(b && e && b < e) {
  17. w1 = substr(w, 1, b-1)
  18. w2 = substr(w, b+1, e-b-1)
  19. w3 = substr(w, e+1)
  20. printf "%s%s\t%d\n", w1, w3, $1 > "junk"
  21. printf "%s%s%s\t%d\n", w1, w2, w3, $1 > "junk"
  22. } else
  23. printf "%s\t%d\n", w, $1 > "junk"
  24. }
  25. }
  26. system("sort -u -t' ' +0f -1 +0 -1 +1n -2 < junk")
  27. system("rm junk")
  28. exit 0
  29. }