123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641 |
- % Copyright (C) 1994, 1996, 1997, 1998, 1999, 2000 Aladdin Enterprises. All rights reserved.
- %
- % This file is part of AFPL Ghostscript.
- %
- % AFPL Ghostscript is distributed with NO WARRANTY OF ANY KIND. No author or
- % distributor accepts any responsibility for the consequences of using it, or
- % for whether it serves any particular purpose or works at all, unless he or
- % she says so in writing. Refer to the Aladdin Free Public License (the
- % "License") for full details.
- %
- % Every copy of AFPL Ghostscript must include a copy of the License, normally
- % in a plain ASCII text file named PUBLIC. The License grants you the right
- % to copy, modify and redistribute AFPL Ghostscript, but only under certain
- % conditions described in the License. Among other things, the License
- % requires that the copyright notice and this notice be preserved on all
- % copies.
- % $Id: pdf_base.ps,v 1.14 2001/03/20 05:04:59 alexcher Exp $
- % pdf_base.ps
- % Basic parser for PDF reader.
- % This handles basic parsing of the file (including the trailer
- % and cross-reference table), as well as objects, object references,
- % streams, and name/number trees; it doesn't include any facilities for
- % making marks on the page.
- /.setlanguagelevel where { pop 2 .setlanguagelevel } if
- .currentglobal true .setglobal
- /pdfdict where { pop } { /pdfdict 100 dict def } ifelse
- pdfdict begin
- % Define the name interpretation dictionary for reading values.
- /valueopdict mark
- (<<) cvn { mark } bind % don't push an actual mark!
- (>>) cvn /.dicttomark load
- ([) cvn { mark } bind % ditto
- (]) cvn dup load
- % /true true % see .pdfexectoken below
- % /false false % ibid.
- % /null null % ibid.
- /F dup cvx % see Objects section below
- /R dup cvx % see Objects section below
- /stream dup cvx % see Streams section below
- .dicttomark readonly def
- % ------ Utilities ------ %
- % Define a scratch string. The PDF language definition says that
- % no line in a PDF file can exceed 255 characters.
- /pdfstring 255 string def
- % Read the previous line of a file. If we aren't at a line boundary,
- % read the line containing the current position.
- % Skip any blank lines.
- /prevline % - prevline <startpos> <substring>
- { PDFfile fileposition dup () pdfstring
- 2 index 257 sub 0 .max PDFfile exch setfileposition
- { % Stack: initpos linepos line string
- PDFfile fileposition
- PDFfile 2 index readline pop
- dup length 0 gt
- { 3 2 roll 5 -2 roll pop pop 2 index }
- { pop }
- ifelse
- % Stack: initpos linepos line string startpos
- PDFfile fileposition 5 index ge { exit } if
- pop
- }
- loop pop pop 3 -1 roll pop
- } bind def
- % Handle the PDF 1.2 #nn escape convention when reading from a file.
- % This should eventually be done in C.
- /.pdffixname { % <execname> .pdffixname <execname'>
- PDFversion 1.2 ge {
- dup .namestring (#) search {
- name#escape cvn exch pop
- } {
- pop
- } ifelse
- } if
- } bind def
- /name#escape % <post> <(#)> <pre> name#escape <string>
- { exch pop
- 1 index 2 () /SubFileDecode filter dup (x) readhexstring
- % Stack: post pre stream char t/f
- not { /.pdftoken cvx /syntaxerror signalerror } if
- exch closefile concatstrings
- exch 2 1 index length 2 sub getinterval
- (#) search { name#escape } if concatstrings
- } bind def
- % Execute a file, interpreting its executable names in a given
- % dictionary. The name procedures may do whatever they want
- % to the operand stack.
- /.pdftokenerror { % <count> <opdict> <errtoken> .pdftokenerror -
- BXlevel 0 le {
- (%stderr) (w) file
- dup (****************Unknown operator: ) writestring
- dup 2 index .writecvs dup (\n) writestring flushfile
- } if pop pop
- count exch sub { pop } repeat % pop all the operands
- } bind def
- /.pdfexectoken { % <count> <opdict> <exectoken> .pdfexectoken ?
- DEBUG { dup == flush } if
- 2 copy .knownget {
- exch pop exch pop exch pop exec
- } {
- % Normally, true, false, and null would appear in opdict
- % and be treated as "operators". However, there is a
- % special fast case in the PostScript interpreter for names
- % that are defined in, and only in, systemdict and/or
- % userdict: putting these three names in the PDF dictionaries
- % destroys this property for them, slowing down their
- % interpretation in all PostScript code. Therefore, we
- % check for them explicitly here instead.
- dup dup dup /true eq exch /false eq or exch /null eq or {
- exch pop exch pop //systemdict exch get
- } {
- .pdftokenerror
- } ifelse
- } ifelse
- } bind def
- /.pdfrun { % <file> <opdict> .pdfrun -
- % Construct a procedure with the stack depth, file and opdict
- % bound into it.
- 1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
- { % Stack: ..operands.. count opdict file
- token {
- dup type /nametype eq {
- dup xcheck {
- .pdfexectoken
- } {
- .pdffixname
- exch pop exch pop DEBUG { dup ==only ( ) print flush } if
- } ifelse
- } {
- exch pop exch pop DEBUG { dup ==only ( ) print flush } if
- } ifelse
- } {
- (%%EOF) cvn cvx .pdfexectoken
- } ifelse
- }
- aload pop .packtomark cvx
- /loop cvx 2 packedarray cvx
- { stopped /PDFsource } aload pop
- PDFsource
- { store { stop } if } aload pop .packtomark cvx
- /PDFsource 3 -1 roll store exec
- } bind def
- % Execute a file, like .pdfrun, for a marking context.
- % This temporarily rebinds LocalResources and DefaultMatrix.
- /.pdfruncontext { % <resdict> <file> <opdict> .pdfruncontext -
- /.pdfrun load LocalResources DefaultMatrix
- /LocalResources 7 -1 roll store
- /DefaultMatrix matrix currentmatrix store
- 3 .execn
- /DefaultMatrix exch store
- /LocalResources exch store
- } bind def
- % Get the depth of the PDF operand stack. The main program (pdf_main.ps)
- % sets pdfemptycount before calling .pdfrun.
- /.pdfcount { % - .pdfcount <count>
- count pdfemptycount sub
- } bind def
- % ------ File reading ------ %
- % Read the cross-reference entry for an (unresolved) object.
- % The caller must save and restore the PDFfile position if desired.
- % For invalid (free) objects, we return 0.
- /readxrefentry % <object#> readxrefentry <objpos>
- { dup Objects exch lget
- PDFfile exch setfileposition
- PDFfile token pop % object position
- PDFfile token pop % generation #
- PDFfile token pop % n or f
- dup /n eq
- { pop 1 add dup 255 gt
- { Generations ltype /stringtype eq
- { % Convert Generations from a string to an array.
- larray Generations llength lgrowto dup
- 0 1 2 index llength 1 sub
- { Generations 1 index lget lput dup
- }
- for pop /Generations exch store
- }
- if
- }
- if
- }
- { /f eq
- { pop 0 }
- { /readxrefentry cvx /syntaxerror signalerror }
- ifelse
- }
- ifelse
- % Stack: obj# objpos 1+gen#
- Generations 4 -1 roll 3 -1 roll lput
- } bind def
- % ================================ Objects ================================ %
- % Since we may have more than 64K objects, we have to use a 2-D array to
- % hold them (and the parallel Generations structure).
- /lshift 9 def
- /lnshift lshift neg def
- /lsubmask 1 lshift bitshift 1 sub def
- /lsublen lsubmask 1 add def
- /larray { % - larray <larray>
- [ [] ]
- } bind def
- /lstring { % - lstring <lstring>
- [ () ]
- } bind def
- /ltype { % <lseq> type <type>
- 0 get type
- } bind def
- /lget { % <lseq> <index> lget <value>
- dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
- } bind def
- /lput { % <lseq> <index> <value> lput -
- 3 1 roll
- dup //lsubmask and 4 1 roll //lnshift bitshift get
- 3 1 roll put
- } bind def
- /llength { % <lseq> llength <length>
- dup length 1 sub dup //lshift bitshift
- 3 1 roll get length add
- } bind def
- % lgrowto assumes newlength > llength(lseq)
- /growto { % <string/array> <length> growto <string'/array'>
- 1 index type /stringtype eq { string } { array } ifelse
- 2 copy copy pop exch pop
- } bind def
- /lgrowto { % <lseq> <newlength> lgrowto <lseq'>
- dup //lsubmask add //lnshift bitshift dup 3 index length gt {
- % Add more sub-arrays. Start by completing the last existing one.
- % Stack: lseq newlen newtoplen
- 3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
- % Stack: newlen newtoplen lseq
- [ exch aload pop
- counttomark 2 add -1 roll % newtoplen
- counttomark sub { dup 0 0 getinterval lsublen growto } repeat
- dup 0 0 getinterval ] exch
- } {
- pop
- } ifelse
- % Expand the last sub-array.
- 1 sub //lsubmask and 1 add
- exch dup dup length 1 sub 2 copy
- % Stack: newsublen lseq lseq len-1 lseq len-1
- get 5 -1 roll growto put
- } bind def
- /lforall { % <lseq> <proc> lforall -
- /forall cvx 2 packedarray cvx forall
- } bind def
- % We keep track of PDF objects using the following PostScript variables:
- %
- % Generations (lstring): Generations[N] holds 1+ the current
- % generation number for object number N. (As far as we can tell,
- % this is needed only for error checking.) For free objects,
- % Generations[N] is 0.
- %
- % Objects (larray): If object N is loaded, Objects[N] is the actual
- % object; otherwise, Objects[N] is an executable integer giving
- % the file offset of the object's entry in the cross-reference
- % table.
- %
- % GlobalObjects (dictionary): If object N has been resolved in
- % global VM, GlobalObjects[N] is the same as Objects[N]
- % (except that GlobalObjects itself is stored in global VM,
- % so the entry will not be deleted at the end of the page).
- %
- % IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
- % global VM. This is an accelerator to avoid having to do a
- % dictionary lookup in GlobalObjects when resolving every object.
- % Initialize the PDF object tables.
- /initPDFobjects { % - initPDFobjects -
- /Objects larray def
- /Generations lstring def
- .currentglobal true .setglobal
- /GlobalObjects 20 dict def
- .setglobal
- /IsGlobal lstring def
- } bind def
- % Grow the tables to a specified size.
- /growPDFobjects { % <minsize> growPDFobjects -
- dup Objects llength gt {
- dup Objects exch lgrowto /Objects exch def
- } if
- dup Generations llength gt {
- dup Generations exch lgrowto /Generations exch def
- } if
- dup IsGlobal llength gt {
- dup IsGlobal exch lgrowto /IsGlobal exch def
- } if
- pop
- } bind def
- % We represent an unresolved object reference by a procedure of the form
- % {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
- % no way to represent procedures. Since PDF in fact has no way to represent
- % any PostScript object that doesn't evaluate to itself, we can 'force'
- % a possibly indirect object painlessly with 'exec'.
- % Note that since we represent streams by executable dictionaries
- % (see below), we need both an xcheck and a type check to determine
- % whether an object has been resolved.
- /resolved? { % <object#> resolved? <value> true
- % <object#> resolved? false
- Objects 1 index lget dup xcheck {
- dup type /integertype eq {
- % Check whether the object is in GlobalObjects.
- pop IsGlobal 1 index lget 0 eq {
- pop false
- } {
- % Update Objects from GlobalObjects
- DEBUG { (%Global=>local: ) print dup == } if
- GlobalObjects 1 index get dup Objects 4 1 roll lput true
- } ifelse
- } {
- exch pop true
- } ifelse
- } {
- exch pop true
- } ifelse
- } bind def
- /oforce /exec load def
- /oget { % <array> <index> oget <object>
- % <dict> <key> oget <object>
- % Before release 6.20, this procedure stored the resolved
- % object back into the referring slot. In order to support
- % PDF linearization, we no longer do this.
- get oforce
- } bind def
- % A null value in a dictionary is equivalent to an omitted key;
- % we must check for this specially.
- /knownoget { % <dict> <key> knownoget <value> true
- % <dict> <key> knownoget false
- % See oget above regarding this procedure.
- .knownget {
- oforce dup null eq { pop false } { true } ifelse
- } {
- false
- } ifelse
- } bind def
- % PDF 1.1 defines a 'foreign file reference', but not its meaning.
- % Per the specification, we convert these to nulls.
- /F { % <file#> <object#> <generation#> F <object>
- % Some PDF 1.1 files use F as a synonym for f!
- .pdfcount 3 lt { f } { pop pop pop null } ifelse
- } bind def
- /checkgeneration { % <object#> <generation#> checkgeneration <object#> <OK>
- Generations 2 index lget 1 sub 1 index eq {
- pop true
- } {
- QUIET not {
- Generations 2 index lget 0 eq {
- (Warning: reference to free object: )
- } {
- (Warning: wrong generation: )
- } ifelse print 1 index =only ( ) print =only ( R) =
- } {
- pop
- } ifelse false
- } ifelse
- } bind def
- /R { % <object#> <generation#> R <object>
- /resolveR cvx 3 packedarray cvx
- } bind def
- % If we encounter an object definition while reading sequentially,
- % we just store it away and keep going.
- /objopdict mark
- valueopdict { } forall
- /endobj dup cvx
- .dicttomark readonly def
- /obj { % <object#> <generation#> obj <object>
- PDFfile objopdict .pdfrun
- } bind def
- /endobj { % <object#> <generation#> <object> endobj <object>
- 3 1 roll
- % Read the xref entry if we haven't yet done so.
- % This is only needed for generation # checking.
- 1 index resolved? {
- pop
- } {
- PDFfile fileposition
- 2 index readxrefentry pop
- PDFoffset add PDFfile exch setfileposition
- } ifelse
- checkgeneration {
- % The only global objects we bother to save are
- % (resource) dictionaries.
- 1 index dup gcheck exch type /dicttype eq and {
- DEBUG { (%Local=>global: ) print dup == } if
- GlobalObjects 1 index 3 index put
- IsGlobal 1 index 1 put
- } if
- Objects exch 2 index lput
- } {
- pop pop null
- } ifelse
- } bind def
- % When resolving an object reference, we stop at the endobj.
- /resolveopdict mark
- valueopdict { } forall
- /endobj { endobj exit } bind
- % OmniForm generates PDF file with endobj missing in some
- % objects. AR ignores this. So we have to do it too.
- /obj { pop pop endobj exit } bind
- .dicttomark readonly def
- /resolveR { % <object#> <generation#> resolveR <object>
- DEBUG { (%Resolving: ) print 2 copy 2 array astore == } if
- 1 index resolved? {
- exch pop exch pop
- } {
- PDFfile fileposition 3 1 roll
- 1 index readxrefentry
- 3 1 roll checkgeneration {
- % Stack: savepos objpos obj#
- exch PDFoffset add PDFfile exch setfileposition
- PDFfile token pop 2 copy ne
- { (xref error!) = /resolveR cvx /rangecheck signalerror
- }
- if pop PDFfile token pop
- PDFfile token pop /obj ne
- { (xref error!) = /resolveR cvx /rangecheck signalerror
- }
- if
- pdf_run_resolve % PDFfile resolveopdict .pdfrun
- }
- { % Don't cache if the generation # is wrong.
- pop pop null
- } ifelse
- exch PDFfile exch setfileposition
- } ifelse
- } bind def
- % ================================ Streams ================================ %
- % We represent a stream by an executable dictionary that contains,
- % in addition to the contents of the original stream dictionary:
- % /File - the file or string where the stream contents are stored,
- % if the stream is not an external one.
- % /FilePosition - iff File is a file, the position in the file
- % where the contents start.
- % /StreamKey - the key used to decrypt this stream, if any.
- % We do the real work of constructing the data stream only when the
- % contents are needed.
- % Construct a stream. The length is not reliable in the face of
- % different end-of-line conventions, but it's all we've got.
- %
- % PDF files are inconsistent about what may fall between the 'stream' keyword
- % and the actual stream data, and it appears that no one algorithm can
- % detect this reliably. We used to try to guess whether the file included
- % extraneous \r and/or \n characters, but we no longer attempt to do so,
- % especially since the PDF 1.2 specification states flatly that the only
- % legal terminators following the 'stream' keyword are \n or \r\n, both of
- % which are properly skipped and discarded by the token operator.
- /stream { % <dict> stream <modified_dict>
- dup /F known dup PDFsource PDFfile eq or {
- not {
- dup /File PDFfile put
- dup /FilePosition PDFfile fileposition put
- DEBUG { (%FilePosition: ) print dup /FilePosition get == } if
- } if
- PDFfile fileposition 1 index /Length oget add
- PDFfile exch setfileposition
- } {
- pop
- % We're already reading from a stream, which we can't reposition.
- % Capture the sub-stream contents in a string.
- dup /Length oget string PDFsource exch readstring
- not {
- (Unexpected EOF in stream!) =
- /stream cvx /rangecheck signalerror
- } if
- 1 index exch /File exch put
- } ifelse
- PDFsource token pop
- /endstream ne { /stream cvx /syntaxerror signalerror } if
- cvx
- } bind def
- /endstream {
- exit
- } bind def
- % Contrary to the published PDF (1.3) specification, Acrobat Reader
- % accepts abbreviated filter names everywhere, not just for in-line images,
- % and some applications (notably htmldoc) rely on this.
- /unabbrevfilterdict mark
- /AHx /ASCIIHexDecode /A85 /ASCII85Decode /CCF /CCITTFaxDecode
- /DCT /DCTDecode /Fl /FlateDecode /LZW /LZWDecode /RL /RunLengthDecode
- .dicttomark readonly def
- % Extract and apply filters.
- /filterparms { % <dict> <DPkey> <Fkey> filterparms
- % <dict> <parms> <filternames>
- 2 index exch .knownget {
- exch 2 index exch .knownget {
- % Both filters and parameters.
- exch dup type /nametype eq {
- 1 array astore exch 1 array astore exch
- } if
- } {
- % Filters, but no parameters.
- null exch
- dup type /nametype eq { 1 array astore } if
- } ifelse
- } {
- % No filters: ignore parameters, if any.
- pop null { }
- } ifelse
- } bind def
- /filtername { % <filtername> filtername <filtername'>
- //unabbrevfilterdict 1 index .knownget { exch pop } if
- } bind def
- /applyfilters { % <parms> <source> <filternames> applyfilters <stream>
- 2 index null eq {
- { filtername filter }
- } {
- { % Stack: parms stream filtername
- 2 index 0 oget dup null eq { pop } { exch } ifelse filtername filter
- exch dup length 1 sub 1 exch getinterval exch
- }
- } ifelse forall exch pop
- } bind def
- % Resolve a stream dictionary to a PostScript stream.
- % Streams with no filters require special handling:
- % - If we are going to interpret their contents, we let endstream
- % terminate the interpretation loop;
- % - If we are just going to read data from them, we impose
- % a SubFileDecode filter that reads just the requisite amount of data.
- % Note that, in general, resolving a stream repositions PDFfile.
- % Clients must save and restore the position of PDFfile themselves.
- /resolvestream { % <streamdict> <readdata?> resolvestream <stream>
- 1 index /F .knownget {
- % This stream is stored on an external file.
- (r) file 3 -1 roll
- /FDecodeParms /FFilter filterparms
- % Stack: readdata? file dict parms filternames
- 4 -1 roll exch
- pdf_decrypt_stream
- applyfilters
- } {
- exch dup /FilePosition .knownget {
- 1 index /File get exch setfileposition
- } if
- % Stack: readdata? dict
- /DecodeParms /Filter filterparms
- % Stack: readdata? dict parms filternames
- 2 index /File get exch
- % Stack: readdata? dict parms file/string filternames
- pdf_decrypt_stream % add decryption if needed
- dup length 0 eq {
- % All the PDF filters have EOD markers, but in this case
- % there is no specified filter.
- pop exch pop
- % Stack: readdata? dict file/string
- 2 index {
- % We're going to read data; use a SubFileDecode filter.
- 1 index /Length oget () /SubFileDecode filter
- } {
- dup type /filetype ne {
- % Use a SubFileDecode filter to read from a string.
- 0 () /SubFileDecode filter
- } if
- } ifelse
- } {
- applyfilters
- } ifelse
- } ifelse
- % Stack: readdata? dict file
- exch pop exch pop
- } bind def
- % ============================ Name/number trees ============================ %
- /nameoget { % <nametree> <key> nameoget <obj|null>
- exch /Names exch .treeget
- } bind def
- /numoget { % <numtree> <key> numoget <obj|null>
- exch /Nums exch .treeget
- } bind def
- /.treeget { % <key> <leafkey> <tree> .treeget <obj|null>
- dup /Kids knownoget {
- exch pop .branchget
- } {
- exch get .leafget
- } ifelse
- } bind def
- /.branchget { % <key> <leafkey> <kids> .branchget <obj|null>
- dup length 0 eq {
- pop pop pop null
- } {
- dup length -1 bitshift 2 copy oget
- % Stack: key leafkey kids mid kids[mid]
- dup /Limits oget aload pop
- % Stack: key leafkey kids mid kids[mid] min max
- 6 index lt {
- pop pop
- 1 add 1 index length 1 index sub getinterval .branchget
- } {
- 5 index gt {
- pop
- 0 exch getinterval .branchget
- } {
- exch pop exch pop .treeget
- } ifelse
- } ifelse
- } ifelse
- } bind def
- /.leafget { % <key> <pairs> .leafget <obj|null>
- dup length 2 eq {
- dup 0 get 2 index eq { 1 oget } { pop null } ifelse
- exch pop
- } {
- dup length -1 bitshift -2 and 2 copy oget
- % Stack: key pairs mid pairs[mid]
- 3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
- getinterval .leafget
- } ifelse
- } bind def
- end % pdfdict
- .setglobal
|