oebpackage.b 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. implement OEBpackage;
  2. include "sys.m";
  3. sys: Sys;
  4. include "bufio.m";
  5. include "url.m";
  6. url: Url;
  7. ParsedUrl: import url;
  8. include "xml.m";
  9. xml: Xml;
  10. Attributes, Locator, Parser: import xml;
  11. include "oebpackage.m";
  12. OEBpkgtype: con "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd";
  13. OEBdoctype: con "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd";
  14. OEBpkg, OEBdoc: con iota;
  15. Laxchecking: con 1;
  16. init(xmlm: Xml)
  17. {
  18. sys = load Sys Sys->PATH;
  19. url = load Url Url->PATH;
  20. if(url != nil)
  21. url->init();
  22. xml = xmlm;
  23. }
  24. open(f: string, warnings: chan of (Xml->Locator, string)): (ref Package, string)
  25. {
  26. (x, e) := xml->open(f, warnings, nil);
  27. if(x == nil)
  28. return (nil, e);
  29. xi := x.next();
  30. if(xi == nil)
  31. return (nil, "not valid XML");
  32. pick d := xi {
  33. Process =>
  34. if(d.target != "xml")
  35. return (nil, "not an XML file");
  36. * =>
  37. return (nil, "unexpected file structure");
  38. }
  39. # XXX i don't understand this 3-times loop...
  40. # seems to me that something like the following (correct) document
  41. # will fail:
  42. # <?xml><!DOCTYPE ...><package> ....</package>
  43. # i.e. no space between the doctype declaration and the
  44. # start of the package tag.
  45. for(i := 0; i < 3; i++){
  46. xi = x.next();
  47. if(xi == nil)
  48. return (nil, "not OEB package");
  49. pick d := xi {
  50. Text =>
  51. ; # usual XML extraneous punctuation cruft
  52. Doctype =>
  53. if(!d.public || len d.params < 2)
  54. return (nil, "not an OEB document or package");
  55. case doctype(hd tl d.params, Laxchecking) {
  56. OEBpkg =>
  57. break;
  58. OEBdoc =>
  59. # it's a document; make it into a simple package
  60. p := ref Package;
  61. p.file = f;
  62. p.uniqueid = d.name;
  63. p.manifest = p.spine = ref Item("doc", f, "text/x-oeb1-document", nil, f, nil) :: nil;
  64. return (p, nil);
  65. * =>
  66. return (nil, "unexpected DOCTYPE for OEB package: " + hd tl d.params );
  67. }
  68. * =>
  69. return (nil, "not OEB package (no DOCTYPE)");
  70. }
  71. }
  72. p := ref Package;
  73. p.file = f;
  74. # package[@unique-identifier[IDREF], Metadata, Manifest, Spine, Tours?, Guide?]
  75. if((tag := next(x, "package")) == nil)
  76. return (nil, "can't find OEB package");
  77. p.uniqueid = tag.attrs.get("unique-identifier");
  78. spine: list of string;
  79. fallbacks: list of (ref Item, string);
  80. x.down();
  81. while((tag = next(x, nil)) != nil){
  82. x.down();
  83. case tag.name {
  84. "metadata" =>
  85. while((tag = next(x, nil)) != nil)
  86. if(tag.name == "dc-metadata"){
  87. x.down();
  88. while((tag = next(x, nil)) != nil && (s := text(x)) != nil)
  89. p.meta = (tag.name, tag.attrs, s) :: p.meta;
  90. x.up();
  91. }
  92. "manifest" =>
  93. while((tag = next(x, "item")) != nil){
  94. a := tag.attrs;
  95. p.manifest = ref Item(a.get("id"), a.get("href"), a.get("media-type"), nil, nil, nil) :: p.manifest;
  96. fallback := a.get("fallback");
  97. if (fallback != nil)
  98. fallbacks = (hd p.manifest, fallback) :: fallbacks;
  99. }
  100. "spine" =>
  101. while((tag = next(x, "itemref")) != nil)
  102. if((id := tag.attrs.get("idref")) != nil)
  103. spine = id :: spine;
  104. "guide" =>
  105. while((tag = next(x, "reference")) != nil){
  106. a := tag.attrs;
  107. p.guide = ref Reference(a.get("type"), a.get("title"), a.get("href")) :: p.guide;
  108. }
  109. "tours" =>
  110. ; # ignore for now
  111. }
  112. x.up();
  113. }
  114. x.up();
  115. # deal with fallbacks, and make sure they're not circular.
  116. for (; fallbacks != nil; fallbacks = tl fallbacks) {
  117. (item, fallbackid) := hd fallbacks;
  118. fallback := lookitem(p.manifest, fallbackid);
  119. for (fi := fallback; fi != nil; fi = fi.fallback)
  120. if (fi == item)
  121. break;
  122. if (fi == nil)
  123. item.fallback = fallback;
  124. else
  125. sys->print("warning: circular fallback reference\n");
  126. }
  127. # we'll assume it doesn't require a hash table
  128. for(; spine != nil; spine = tl spine)
  129. if((item := lookitem(p.manifest, hd spine)) != nil)
  130. p.spine = item :: p.spine;
  131. else
  132. p.spine = ref Item(hd spine, nil, nil, nil, nil, "item in OEB spine but not listed in manifest") :: p.spine;
  133. guide := p.guide;
  134. for(p.guide = nil; guide != nil; guide = tl guide)
  135. p.guide = hd guide :: p.guide;
  136. return (p, nil);
  137. }
  138. doctype(s: string, lax: int): int
  139. {
  140. case s {
  141. OEBpkgtype =>
  142. return OEBpkg;
  143. OEBdoctype =>
  144. return OEBdoc;
  145. * =>
  146. if (!lax)
  147. return -1;
  148. if (contains(s, "oebpkg1"))
  149. return OEBpkg;
  150. if (contains(s, "oebdoc1"));
  151. return OEBdoc;
  152. return -1;
  153. }
  154. }
  155. # does s1 contain s2
  156. contains(s1, s2: string): int
  157. {
  158. if (len s2 > len s1)
  159. return 0;
  160. n := len s1 - len s2 + 1;
  161. search:
  162. for (i := 0; i < n ; i++) {
  163. for (j := 0; j < len s2; j++)
  164. if (s1[i + j] != s2[j])
  165. continue search;
  166. return 1;
  167. }
  168. return 0;
  169. }
  170. lookitem(items: list of ref Item, id: string): ref Item
  171. {
  172. for(; items != nil; items = tl items){
  173. item := hd items;
  174. if(item.id == id)
  175. return item;
  176. }
  177. return nil;
  178. }
  179. next(x: ref Parser, s: string): ref Xml->Item.Tag
  180. {
  181. while ((t0 := x.next()) != nil) {
  182. pick t1 := t0 {
  183. Error =>
  184. sys->print("oebpackage: error: %s:%d: %s\n", t1.loc.systemid, t1.loc.line, t1.msg);
  185. Tag =>
  186. if (s == nil || s == t1.name)
  187. return t1;
  188. }
  189. }
  190. return nil;
  191. }
  192. text(x: ref Parser): string
  193. {
  194. s: string;
  195. x.down();
  196. loop:
  197. while ((t0 := x.next()) != nil) {
  198. pick t1 := t0 {
  199. Error =>
  200. sys->print("oebpackage: error: %s:%d: %s\n", t1.loc.systemid, t1.loc.line, t1.msg);
  201. Text =>
  202. s = t1.ch;
  203. break loop;
  204. }
  205. }
  206. x.up();
  207. return s;
  208. }
  209. Package.getmeta(p: self ref Package, n: string): list of (Xml->Attributes, string)
  210. {
  211. r: list of (Xml->Attributes, string);
  212. for(meta := p.meta; meta != nil; meta = tl meta){
  213. (name, a, value) := hd meta;
  214. if(name == n)
  215. r = (a, value) :: r;
  216. }
  217. # r is in file order because p.meta is reversed
  218. return r;
  219. }
  220. Package.locate(p: self ref Package): int
  221. {
  222. dir := "./";
  223. for(n := len p.file; --n >= 0;)
  224. if(p.file[n] == '/'){
  225. dir = p.file[0:n+1];
  226. break;
  227. }
  228. nmissing := 0;
  229. for(items := p.manifest; items != nil; items = tl items){
  230. item := hd items;
  231. err := "";
  232. if(item.href != nil){
  233. u := url->makeurl(item.href);
  234. if(u.scheme != Url->FILE && u.scheme != Url->NOSCHEME)
  235. err = sys->sprint("URL scheme %s not yet supported", url->schemes[u.scheme]);
  236. else if(u.host != "localhost" && u.host != nil)
  237. err = "non-local URLs not supported";
  238. else{
  239. path := u.path;
  240. if(u.pstart != "/")
  241. path = dir+path; # TO DO: security
  242. (ok, d) := sys->stat(path);
  243. if(ok >= 0)
  244. item.file = path;
  245. else
  246. err = sys->sprint("%r");
  247. }
  248. }else
  249. err = "no location specified (missing HREF)";
  250. if(err != nil)
  251. nmissing++;
  252. item.missing = err;
  253. }
  254. return nmissing;
  255. }