%    Copyright (C) 1994, 1996, 1997, 1998, 1999, 2000 Aladdin Enterprises.  All rights reserved.
% 
% This file is part of AFPL Ghostscript.
% 
% AFPL Ghostscript is distributed with NO WARRANTY OF ANY KIND.  No author or
% distributor accepts any responsibility for the consequences of using it, or
% for whether it serves any particular purpose or works at all, unless he or
% she says so in writing.  Refer to the Aladdin Free Public License (the
% "License") for full details.
% 
% Every copy of AFPL Ghostscript must include a copy of the License, normally
% in a plain ASCII text file named PUBLIC.  The License grants you the right
% to copy, modify and redistribute AFPL Ghostscript, but only under certain
% conditions described in the License.  Among other things, the License
% requires that the copyright notice and this notice be preserved on all
% copies.

% $Id: pdf_base.ps,v 1.14 2001/03/20 05:04:59 alexcher Exp $
% pdf_base.ps
% Basic parser for PDF reader.

% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% streams, and name/number trees; it doesn't include any facilities for
% making marks on the page.

/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin

% Define the name interpretation dictionary for reading values.
/valueopdict mark
  (<<) cvn { mark } bind	% don't push an actual mark!
  (>>) cvn /.dicttomark load
  ([) cvn { mark } bind		% ditto
  (]) cvn dup load
%  /true true		% see .pdfexectoken below
%  /false false		% ibid.
%  /null null		% ibid.
  /F dup cvx		% see Objects section below
  /R dup cvx		% see Objects section below
  /stream dup cvx	% see Streams section below
.dicttomark readonly def

% ------ Utilities ------ %

% Define a scratch string.  The PDF language definition says that
% no line in a PDF file can exceed 255 characters.
/pdfstring 255 string def

% Read the previous line of a file.  If we aren't at a line boundary,
% read the line containing the current position.
% Skip any blank lines.
/prevline		% - prevline <startpos> <substring>
 { PDFfile fileposition dup () pdfstring
   2 index 257 sub 0 .max PDFfile exch setfileposition
    {		% Stack: initpos linepos line string
      PDFfile fileposition
      PDFfile 2 index readline pop
      dup length 0 gt
       { 3 2 roll 5 -2 roll pop pop 2 index }
       { pop }
      ifelse
		% Stack: initpos linepos line string startpos
      PDFfile fileposition 5 index ge { exit } if
      pop
    }
   loop pop pop 3 -1 roll pop
 } bind def

% Handle the PDF 1.2 #nn escape convention when reading from a file.
% This should eventually be done in C.
/.pdffixname {			% <execname> .pdffixname <execname'>
  PDFversion 1.2 ge {
    dup .namestring (#) search {
      name#escape cvn exch pop
    } {
      pop
    } ifelse
  } if
} bind def
/name#escape			% <post> <(#)> <pre> name#escape <string>
{ exch pop
  1 index 2 () /SubFileDecode filter dup (x) readhexstring
		% Stack: post pre stream char t/f
  not { /.pdftoken cvx /syntaxerror signalerror } if
  exch closefile concatstrings
  exch 2 1 index length 2 sub getinterval
  (#) search { name#escape } if concatstrings
} bind def

% Execute a file, interpreting its executable names in a given
% dictionary.  The name procedures may do whatever they want
% to the operand stack.
/.pdftokenerror {		% <count> <opdict> <errtoken> .pdftokenerror -
  BXlevel 0 le {
    (%stderr) (w) file
    dup (****************Unknown operator: ) writestring
    dup 2 index .writecvs dup (\n) writestring flushfile
  } if pop pop
  count exch sub { pop } repeat	% pop all the operands
} bind def
/.pdfexectoken {		% <count> <opdict> <exectoken> .pdfexectoken ?
  DEBUG { dup == flush } if
  2 copy .knownget {
    exch pop exch pop exch pop exec
  } {
		% Normally, true, false, and null would appear in opdict
		% and be treated as "operators".  However, there is a
		% special fast case in the PostScript interpreter for names
		% that are defined in, and only in, systemdict and/or
		% userdict: putting these three names in the PDF dictionaries
		% destroys this property for them, slowing down their
		% interpretation in all PostScript code.  Therefore, we
		% check for them explicitly here instead.
    dup dup dup /true eq exch /false eq or exch /null eq or {
      exch pop exch pop //systemdict exch get
    } {
      .pdftokenerror
    } ifelse
  } ifelse
} bind def
/.pdfrun {			% <file> <opdict> .pdfrun -
	% Construct a procedure with the stack depth, file and opdict
	% bound into it.
  1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
  {	% Stack: ..operands.. count opdict file
    token {
      dup type /nametype eq {
	dup xcheck {
	  .pdfexectoken
	} {
	  .pdffixname
	  exch pop exch pop DEBUG { dup ==only ( ) print flush } if
	} ifelse
      } {
	exch pop exch pop DEBUG { dup ==only ( ) print flush } if
      } ifelse
    } {
      (%%EOF) cvn cvx .pdfexectoken
    } ifelse
  }
  aload pop .packtomark cvx
  /loop cvx 2 packedarray cvx
  { stopped /PDFsource } aload pop
  PDFsource
  { store { stop } if } aload pop .packtomark cvx
  /PDFsource 3 -1 roll store exec
} bind def

% Execute a file, like .pdfrun, for a marking context.
% This temporarily rebinds LocalResources and DefaultMatrix.
/.pdfruncontext {		% <resdict> <file> <opdict> .pdfruncontext -
  /.pdfrun load LocalResources DefaultMatrix
  /LocalResources 7 -1 roll store
  /DefaultMatrix matrix currentmatrix store
  3 .execn
  /DefaultMatrix exch store
  /LocalResources exch store
} bind def

% Get the depth of the PDF operand stack.  The main program (pdf_main.ps)
% sets pdfemptycount before calling .pdfrun.
/.pdfcount {		% - .pdfcount <count>
  count pdfemptycount sub
} bind def

% ------ File reading ------ %

% Read the cross-reference entry for an (unresolved) object.
% The caller must save and restore the PDFfile position if desired.
% For invalid (free) objects, we return 0.
/readxrefentry		% <object#> readxrefentry <objpos>
 { dup Objects exch lget
   PDFfile exch setfileposition
   PDFfile token pop		% object position
   PDFfile token pop		% generation #
   PDFfile token pop		% n or f
   dup /n eq
    { pop 1 add dup 255 gt
       { Generations ltype /stringtype eq
	  {		% Convert Generations from a string to an array.
	    larray Generations llength lgrowto dup
	    0 1 2 index llength 1 sub
	     { Generations 1 index lget lput dup
	     }
	    for pop /Generations exch store
	  }
	 if
       }
      if
    }
    { /f eq
       { pop 0 }
       { /readxrefentry cvx /syntaxerror signalerror }
      ifelse
    }
   ifelse
		% Stack: obj# objpos 1+gen#
   Generations 4 -1 roll 3 -1 roll lput
 } bind def

% ================================ Objects ================================ %

% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray {	% - larray <larray>
  [ [] ]
} bind def
/lstring {	% - lstring <lstring>
  [ () ]
} bind def
/ltype {	% <lseq> type <type>
  0 get type
} bind def
/lget {		% <lseq> <index> lget <value>
  dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput {		% <lseq> <index> <value> lput -
  3 1 roll
  dup //lsubmask and 4 1 roll //lnshift bitshift get
  3 1 roll put
} bind def
/llength {	% <lseq> llength <length>
  dup length 1 sub dup //lshift bitshift
  3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto {	% <string/array> <length> growto <string'/array'>
  1 index type /stringtype eq { string } { array } ifelse
  2 copy copy pop exch pop
} bind def
/lgrowto {	% <lseq> <newlength> lgrowto <lseq'>
    dup //lsubmask add //lnshift bitshift dup 3 index length gt {
	% Add more sub-arrays.  Start by completing the last existing one.
		% Stack: lseq newlen newtoplen
    3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
		% Stack: newlen newtoplen lseq
    [ exch aload pop
    counttomark 2 add -1 roll		% newtoplen
    counttomark sub { dup 0 0 getinterval lsublen growto } repeat
    dup 0 0 getinterval ] exch
  } {
    pop
  } ifelse
	% Expand the last sub-array.
  1 sub //lsubmask and 1 add
  exch dup dup length 1 sub 2 copy
		% Stack: newsublen lseq lseq len-1 lseq len-1
  get 5 -1 roll growto put
} bind def
/lforall {	% <lseq> <proc> lforall -
  /forall cvx 2 packedarray cvx forall
} bind def

% We keep track of PDF objects using the following PostScript variables:
%
%	Generations (lstring): Generations[N] holds 1+ the current
%	    generation number for object number N.  (As far as we can tell,
%	    this is needed only for error checking.)  For free objects,
%	    Generations[N] is 0.
%
%	Objects (larray): If object N is loaded, Objects[N] is the actual
%	    object; otherwise, Objects[N] is an executable integer giving
%	    the file offset of the object's entry in the cross-reference
%	    table.
%
%	GlobalObjects (dictionary): If object N has been resolved in
%	    global VM, GlobalObjects[N] is the same as Objects[N]
%	    (except that GlobalObjects itself is stored in global VM,
%	    so the entry will not be deleted at the end of the page).
%
%	IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
%	    global VM.  This is an accelerator to avoid having to do a
%	    dictionary lookup in GlobalObjects when resolving every object.

% Initialize the PDF object tables.
/initPDFobjects {		% - initPDFobjects -
  /Objects larray def
  /Generations lstring def
  .currentglobal true .setglobal
  /GlobalObjects 20 dict def
  .setglobal
  /IsGlobal lstring def
} bind def

% Grow the tables to a specified size.
/growPDFobjects {		% <minsize> growPDFobjects -
  dup Objects llength gt {
    dup Objects exch lgrowto /Objects exch def
  } if
  dup Generations llength gt {
    dup Generations exch lgrowto /Generations exch def
  } if
  dup IsGlobal llength gt {
    dup IsGlobal exch lgrowto /IsGlobal exch def
  } if
  pop
} bind def

% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}.  This is not a possible PDF object, because PDF has
% no way to represent procedures.  Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/resolved? {		% <object#> resolved? <value> true
			% <object#> resolved? false
  Objects 1 index lget dup xcheck {
    dup type /integertype eq {
		% Check whether the object is in GlobalObjects.
      pop IsGlobal 1 index lget 0 eq {
	pop false
      } {
		% Update Objects from GlobalObjects
	DEBUG { (%Global=>local: ) print dup == } if
	GlobalObjects 1 index get dup Objects 4 1 roll lput true
      } ifelse
    } {
      exch pop true
    } ifelse
  } {
    exch pop true
  } ifelse
} bind def
/oforce /exec load def
/oget {		% <array> <index> oget <object>
		% <dict> <key> oget <object>
		% Before release 6.20, this procedure stored the resolved
		% object back into the referring slot.  In order to support
		% PDF linearization, we no longer do this.
  get oforce
} bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget {	% <dict> <key> knownoget <value> true
		% <dict> <key> knownoget false
		% See oget above regarding this procedure.
  .knownget {
    oforce dup null eq { pop false } { true } ifelse
  } {
    false
  } ifelse
} bind def

% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F {		% <file#> <object#> <generation#> F <object>
		% Some PDF 1.1 files use F as a synonym for f!
   .pdfcount 3 lt { f } { pop pop pop null } ifelse
} bind def

/checkgeneration {  % <object#> <generation#> checkgeneration <object#> <OK>
  Generations 2 index lget 1 sub 1 index eq {
    pop true
  } {
    QUIET not {
      Generations 2 index lget 0 eq {
	(Warning: reference to free object: )
      } {
	(Warning: wrong generation: )
      } ifelse print 1 index =only ( ) print =only ( R) =
    } {
      pop
    } ifelse false
  } ifelse
} bind def
/R {		% <object#> <generation#> R <object>
  /resolveR cvx 3 packedarray cvx
} bind def

% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
  valueopdict { } forall
  /endobj dup cvx
.dicttomark readonly def
/obj {			% <object#> <generation#> obj <object>
  PDFfile objopdict .pdfrun
} bind def
/endobj {		% <object#> <generation#> <object> endobj <object>
  3 1 roll
		% Read the xref entry if we haven't yet done so.
		% This is only needed for generation # checking.
  1 index resolved? {
    pop
  } {
    PDFfile fileposition
    2 index readxrefentry pop
    PDFoffset add PDFfile exch setfileposition
  } ifelse
  checkgeneration {
		% The only global objects we bother to save are
		% (resource) dictionaries.
    1 index dup gcheck exch type /dicttype eq and {
      DEBUG { (%Local=>global: ) print dup == } if
      GlobalObjects 1 index 3 index put
      IsGlobal 1 index 1 put
    } if
    Objects exch 2 index lput
  } {
    pop pop null
  } ifelse
} bind def

% When resolving an object reference, we stop at the endobj.
/resolveopdict mark
  valueopdict { } forall
  /endobj { endobj exit } bind
                % OmniForm generates PDF file with endobj missing in some
                % objects. AR ignores this. So we have to do it too.
  /obj { pop pop endobj exit } bind
.dicttomark readonly def
/resolveR {		% <object#> <generation#> resolveR <object>
  DEBUG { (%Resolving: ) print 2 copy 2 array astore == } if
  1 index resolved? {
    exch pop exch pop
  } {
    PDFfile fileposition 3 1 roll
    1 index readxrefentry
    3 1 roll checkgeneration {
			% Stack: savepos objpos obj#
	 exch PDFoffset add PDFfile exch setfileposition
	 PDFfile token pop 2 copy ne
	  { (xref error!) = /resolveR cvx /rangecheck signalerror
	  }
	 if pop PDFfile token pop
	 PDFfile token pop /obj ne
	  { (xref error!) = /resolveR cvx /rangecheck signalerror
	  }
	 if
	 pdf_run_resolve	% PDFfile resolveopdict .pdfrun
    }
    {		% Don't cache if the generation # is wrong.
	 pop pop null
    } ifelse
    exch PDFfile exch setfileposition
  } ifelse
} bind def      

% ================================ Streams ================================ %

% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
%	/File - the file or string where the stream contents are stored,
%	  if the stream is not an external one.
%	/FilePosition - iff File is a file, the position in the file
%	  where the contents start.
%	/StreamKey - the key used to decrypt this stream, if any.
% We do the real work of constructing the data stream only when the
% contents are needed.

% Construct a stream.  The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
%
% PDF files are inconsistent about what may fall between the 'stream' keyword
% and the actual stream data, and it appears that no one algorithm can
% detect this reliably.  We used to try to guess whether the file included
% extraneous \r and/or \n characters, but we no longer attempt to do so,
% especially since the PDF 1.2 specification states flatly that the only
% legal terminators following the 'stream' keyword are \n or \r\n, both of
% which are properly skipped and discarded by the token operator.
/stream {	% <dict> stream <modified_dict>
  dup /F known dup PDFsource PDFfile eq or {
    not {
      dup /File PDFfile put
      dup /FilePosition PDFfile fileposition put
      DEBUG { (%FilePosition: ) print dup /FilePosition get == } if
    } if
    PDFfile fileposition 1 index /Length oget add
      PDFfile exch setfileposition
  } {
    pop
	% We're already reading from a stream, which we can't reposition.
	% Capture the sub-stream contents in a string.
    dup /Length oget string PDFsource exch readstring
    not {
      (Unexpected EOF in stream!) =
      /stream cvx /rangecheck signalerror
    } if
    1 index exch /File exch put
  } ifelse
  PDFsource token pop
    /endstream ne { /stream cvx /syntaxerror signalerror } if
  cvx
} bind def
/endstream {
  exit
} bind def

% Contrary to the published PDF (1.3) specification, Acrobat Reader
% accepts abbreviated filter names everywhere, not just for in-line images,
% and some applications (notably htmldoc) rely on this.
/unabbrevfilterdict mark
  /AHx /ASCIIHexDecode  /A85 /ASCII85Decode  /CCF /CCITTFaxDecode
  /DCT /DCTDecode  /Fl /FlateDecode  /LZW /LZWDecode  /RL /RunLengthDecode
.dicttomark readonly def

% Extract and apply filters.
/filterparms {		% <dict> <DPkey> <Fkey> filterparms
			%   <dict> <parms> <filternames>
  2 index exch .knownget {
    exch 2 index exch .knownget {
		% Both filters and parameters.
      exch dup type /nametype eq {
	1 array astore exch 1 array astore exch
      } if
    } {
		% Filters, but no parameters.
      null exch
      dup type /nametype eq { 1 array astore } if
    } ifelse
  } {
		% No filters: ignore parameters, if any.
    pop null { }
  } ifelse
} bind def
/filtername {		% <filtername> filtername <filtername'>
  //unabbrevfilterdict 1 index .knownget { exch pop } if
} bind def
/applyfilters {		% <parms> <source> <filternames> applyfilters <stream>
  2 index null eq {
    { filtername filter }
  } {
    {		% Stack: parms stream filtername
      2 index 0 oget dup null eq { pop } { exch } ifelse filtername filter
      exch dup length 1 sub 1 exch getinterval exch
    }
  } ifelse forall exch pop
} bind def

% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
%	- If we are going to interpret their contents, we let endstream
%	  terminate the interpretation loop;
%	- If we are just going to read data from them, we impose
%	  a SubFileDecode filter that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream {	% <streamdict> <readdata?> resolvestream <stream>
  1 index /F .knownget {
		% This stream is stored on an external file.
    (r) file 3 -1 roll
    /FDecodeParms /FFilter filterparms
		% Stack: readdata? file dict parms filternames
    4 -1 roll exch
    pdf_decrypt_stream
    applyfilters
  } {
    exch dup /FilePosition .knownget {
      1 index /File get exch setfileposition
    } if
		% Stack: readdata? dict
    /DecodeParms /Filter filterparms
		% Stack: readdata? dict parms filternames
    2 index /File get exch
		% Stack: readdata? dict parms file/string filternames
    pdf_decrypt_stream		% add decryption if needed
    dup length 0 eq {
		% All the PDF filters have EOD markers, but in this case
		% there is no specified filter.
      pop exch pop
		% Stack: readdata? dict file/string
      2 index {
		% We're going to read data; use a SubFileDecode filter.
	1 index /Length oget () /SubFileDecode filter
      } {
	dup type /filetype ne {
		% Use a SubFileDecode filter to read from a string.
	  0 () /SubFileDecode filter
	} if
      } ifelse
    } {
      applyfilters
    } ifelse
  } ifelse
		% Stack: readdata? dict file
  exch pop exch pop
} bind def

% ============================ Name/number trees ============================ %

/nameoget {		% <nametree> <key> nameoget <obj|null>
  exch /Names exch .treeget
} bind def

/numoget {		% <numtree> <key> numoget <obj|null>
  exch /Nums exch .treeget
} bind def

/.treeget {		% <key> <leafkey> <tree> .treeget <obj|null>
  dup /Kids knownoget {
    exch pop .branchget
  } {
    exch get .leafget
  } ifelse
} bind def

/.branchget {		%  <key> <leafkey> <kids> .branchget <obj|null>
  dup length 0 eq {
    pop pop pop null
  } {
    dup length -1 bitshift 2 copy oget
			% Stack: key leafkey kids mid kids[mid]
    dup /Limits oget aload pop
			% Stack: key leafkey kids mid kids[mid] min max
    6 index lt {
      pop pop
      1 add 1 index length 1 index sub getinterval .branchget
    } {
      5 index gt {
	pop
	0 exch getinterval .branchget
      } {
	exch pop exch pop .treeget
      } ifelse
    } ifelse
  } ifelse
} bind def

/.leafget {		% <key> <pairs> .leafget <obj|null>
  dup length 2 eq {
    dup 0 get 2 index eq { 1 oget } { pop null } ifelse
    exch pop
  } {
    dup length -1 bitshift -2 and 2 copy oget
			% Stack: key pairs mid pairs[mid]
    3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
    getinterval .leafget
  } ifelse
} bind def

end			% pdfdict
.setglobal