plan9front/sys/lib/ghostscript/pdf_base.ps
2015-02-20 00:21:45 +01:00

921 lines
34 KiB
PostScript

% Copyright (C) 1994-2003 artofcode LLC. All rights reserved.
%
% This software is provided AS-IS with no warranty, either express or
% implied.
%
% This software is distributed under license and may not be copied,
% modified or distributed except as expressly authorized under the terms
% of the license contained in the file LICENSE in this distribution.
%
% For more information about licensing, please refer to
% http://www.ghostscript.com/licensing/. For information on
% commercial licensing, go to http://www.artifex.com/licensing/ or
% contact Artifex Software, Inc., 101 Lucas Valley Road #110,
% San Rafael, CA 94903, U.S.A., +1(415)492-9861.
% $Id: pdf_base.ps,v 1.48 2005/09/16 19:01:30 ray Exp $
% pdf_base.ps
% Basic parser for PDF reader.
% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% streams, and name/number trees; it doesn't include any facilities for
% making marks on the page.
/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin
% Define the name interpretation dictionary for reading values.
/valueopdict mark
(<<) cvn { mark } bind % don't push an actual mark!
(>>) cvn { { .dicttomark } stopped {
( **** File has an unbalanced >> \(close dictionary\).\n)
pdfformaterror
} if
} bind
([) cvn { mark } bind % ditto
(]) cvn dup load
% /true true % see .pdfexectoken below
% /false false % ibid.
% /null null % ibid.
/F dup cvx % see Objects section below
/R dup cvx % see Objects section below
/stream dup cvx % see Streams section below
.dicttomark readonly def
% ------ Utilities ------ %
% Define a scratch string. The PDF language definition says that
% no line in a PDF file can exceed 255 characters.
/pdfstring 255 string def
% Read the previous line of a file. If we aren't at a line boundary,
% read the line containing the current position.
% Skip any blank lines.
/prevline % - prevline <startpos> <substring>
{ PDFfile fileposition dup () pdfstring
2 index 257 sub 0 .max PDFfile exch setfileposition
{ % Stack: initpos linepos line string
PDFfile fileposition
PDFfile 2 index readline pop
dup length 0 gt
{ 3 2 roll 5 -2 roll pop pop 2 index }
{ pop }
ifelse
% Stack: initpos linepos line string startpos
PDFfile fileposition 5 index ge { exit } if
pop
}
loop pop pop 3 -1 roll pop
} bind def
% Handle the PDF 1.2 #nn escape convention when reading from a file.
% This should eventually be done in C.
/.pdffixname { % <execname> .pdffixname <execname'>
PDFversion 1.2 ge {
dup .namestring (#) search {
name#escape cvn exch pop
} {
pop
} ifelse
} if
} bind def
/name#escape % <post> <(#)> <pre> name#escape <string>
{ exch pop
1 index 2 () /SubFileDecode filter dup (x) readhexstring
% Stack: post pre stream char t/f
not { % tolerate, but complain about bad syntax
pop closefile (#) concatstrings exch
( **** Warning: Invalid hex following '#' name escape, using literal '#' in name.\n)
pdfformaterror
} {
exch closefile concatstrings
exch 2 1 index length 2 sub getinterval
} ifelse
(#) search { name#escape } if concatstrings
} bind def
% Execute a file, interpreting its executable names in a given
% dictionary. The name procedures may do whatever they want
% to the operand stack.
/.pdftokenerror { % <count> <opdict> <errtoken> .pdftokenerror -
BXlevel 0 le {
( **** Unknown operator: ') pdfformaterror
dup =string cvs pdfformaterror
% Attempt a retry scan of the element after changing to PDFScanInvNum
<< /PDFScanInvNum true >> setuserparams
=string cvs
token pop exch pop dup type
dup /integertype eq exch /realtype eq or {
exch pop exch pop
(', processed as number, value: ) pdfformaterror
dup =string cvs pdfformaterror (\n) pdfformaterror
<< /PDFScanInvNum null >> setuserparams % reset to default scanning rules
false % suppress any stack cleanup
} {
% error was non-recoverable with modified scanning rules
('\n) pdfformaterror
true
} ifelse
} {
true
} ifelse
{ % clean up the operand stack if this was non-recoverable
pop pop count exch sub { pop } repeat % pop all the operands
} if
} bind def
/.pdfexectoken { % <count> <opdict> <exectoken> .pdfexectoken ?
PDFDEBUG {
pdfdict /PDFSTEPcount known not { pdfdict /PDFSTEPcount 1 .forceput } if
PDFSTEP {
pdfdict /PDFtokencount 2 copy .knownget { 1 add } { 1 } ifelse .forceput
PDFSTEPcount 1 gt {
pdfdict /PDFSTEPcount PDFSTEPcount 1 sub .forceput
} {
dup ==only
( step # ) print PDFtokencount =only
( ? ) print flush 1 false .outputpage
(%stdin) (r) file 255 string readline {
token {
exch pop pdfdict /PDFSTEPcount 3 -1 roll .forceput
} {
pdfdict /PDFSTEPcount 1 .forceput
} ifelse % token
} {
pop /PDFSTEP false def % EOF on stdin
} ifelse % readline
} ifelse % PDFSTEPcount > 1
} {
dup ==only () = flush
} ifelse % PDFSTEP
} if % PDFDEBUG
2 copy .knownget {
exch pop exch pop exch pop exec
} {
% Normally, true, false, and null would appear in opdict
% and be treated as "operators". However, there is a
% special fast case in the PostScript interpreter for names
% that are defined in, and only in, systemdict and/or
% userdict: putting these three names in the PDF dictionaries
% destroys this property for them, slowing down their
% interpretation in all PostScript code. Therefore, we
% check for them explicitly here instead.
dup dup dup /true eq exch /false eq or exch /null eq or {
exch pop exch pop //systemdict exch get
} {
.pdftokenerror
} ifelse
} ifelse
} bind def
/.pdfrun { % <file> <opdict> .pdfrun -
% Construct a procedure with the stack depth, file and opdict
% bound into it.
1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
{ % Stack: ..operands.. count opdict file
token {
dup type /nametype eq {
dup xcheck {
.pdfexectoken
} {
.pdffixname
exch pop exch pop PDFDEBUG {
PDFSTEPcount 1 le {
dup ==only ( ) print flush
} if
} if
} ifelse
} {
exch pop exch pop PDFDEBUG {
PDFSTEPcount 1 le {
dup ==only ( ) print flush
} if
} if
} ifelse
} {
(%%EOF) cvn cvx .pdfexectoken
} ifelse
}
aload pop .packtomark cvx
/loop cvx 2 packedarray cvx
{ stopped /PDFsource } aload pop
PDFsource
{ store { stop } if } aload pop .packtomark cvx
/PDFsource 3 -1 roll store exec
} bind def
% Execute a file, like .pdfrun, for a marking context.
% This temporarily rebinds LocalResources and DefaultQstate.
/.pdfruncontext { % <resdict> <file> <opdict> .pdfruncontext -
/.pdfrun load LocalResources DefaultQstate
/LocalResources 7 -1 roll store
/DefaultQstate qstate store
3 .execn
/DefaultQstate exch store
/LocalResources exch store
} bind def
% Get the depth of the PDF operand stack. The caller sets pdfemptycount
% before calling .pdfrun or .pdfruncontext. It is initially set by
% pdf_main, and is also set by any routine which changes the operand
% stack depth (currently .pdfpaintproc, although there are other callers
% of .pdfrun{context} which have not been checked for opstack depth.
/.pdfcount { % - .pdfcount <count>
count pdfemptycount sub
} bind def
% ================================ Objects ================================ %
% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray { % - larray <larray>
[ [] ]
} bind def
/lstring { % - lstring <lstring>
[ () ]
} bind def
/ltype { % <lseq> type <type>
0 get type
} bind def
/lget { % <lseq> <index> lget <value>
dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput { % <lseq> <index> <value> lput -
3 1 roll
dup //lsubmask and 4 1 roll //lnshift bitshift get
3 1 roll put
} bind def
/llength { % <lseq> llength <length>
dup length 1 sub dup //lshift bitshift
3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto { % <string/array> <length> growto <string'/array'>
1 index type /stringtype eq { string } { array } ifelse
2 copy copy pop exch pop
} bind def
/lgrowto { % <lseq> <newlength> lgrowto <lseq'>
dup //lsubmask add //lnshift bitshift dup 3 index length gt {
% Add more sub-arrays. Start by completing the last existing one.
% Stack: lseq newlen newtoplen
3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
% Stack: newlen newtoplen lseq
[ exch aload pop
counttomark 2 add -1 roll % newtoplen
counttomark sub { dup 0 0 getinterval lsublen growto } repeat
dup 0 0 getinterval ] exch
} {
pop
} ifelse
% Expand the last sub-array.
1 sub //lsubmask and 1 add
exch dup dup length 1 sub 2 copy
% Stack: newsublen lseq lseq len-1 lseq len-1
get 5 -1 roll growto put
} bind def
/lforall { % <lseq> <proc> lforall -
/forall cvx 2 packedarray cvx forall
} bind def
% We keep track of PDF objects using the following PostScript variables:
%
% Generations (lstring): Generations[N] holds 1+ the current
% generation number for object number N. (As far as we can tell,
% this is needed only for error checking.) For free objects,
% Generations[N] is 0.
%
% Objects (larray): If object N is loaded, Objects[N] is the actual
% object; otherwise, Objects[N] is an executable integer giving
% the file offset of the object's location in the file. If
% ObjectStream[N] is non-zero then Objects[N] contains the index
% into the object stream instead of the file offset of the object.
%
% ObjectStream (larray): If object N is in an object stream then
% ObjectStream[N] holds the object number of the object stream.
% Otherwise ObjectStream[N] contains 0. If ObjectStream[N]
% is non-zero then Objects[N] contains the index into the object
% stream.
%
% GlobalObjects (dictionary): If object N has been resolved in
% global VM, GlobalObjects[N] is the same as Objects[N]
% (except that GlobalObjects itself is stored in global VM,
% so the entry will not be deleted at the end of the page).
%
% IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
% global VM. This is an accelerator to avoid having to do a
% dictionary lookup in GlobalObjects when resolving every object.
% Initialize the PDF object tables.
/initPDFobjects { % - initPDFobjects -
/ObjectStream larray def
/Objects larray def
/Generations lstring def
.currentglobal true .setglobal
/GlobalObjects 20 dict def
.setglobal
/IsGlobal lstring def
} bind def
% Grow the tables to a specified size.
/growPDFobjects { % <minsize> growPDFobjects -
dup ObjectStream llength gt {
dup ObjectStream exch lgrowto /ObjectStream exch def
} if
dup Objects llength gt {
dup Objects exch lgrowto /Objects exch def
} if
dup Generations llength gt {
dup Generations exch lgrowto /Generations exch def
} if
dup IsGlobal llength gt {
dup IsGlobal exch lgrowto /IsGlobal exch def
} if
pop
} bind def
% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
% no way to represent procedures. Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/resolved? { % <object#> resolved? <value> true
% <object#> resolved? false
Objects 1 index lget dup xcheck { % Check if executable
dup type /integertype eq { % Check if an integer
% Check whether the object is in GlobalObjects.
pop IsGlobal 1 index lget 0 eq { % 0 --> Not in GlabalObjects
pop false % The object is not resolved
} { % The object is in GlobalObjects
% Update Objects from GlobalObjects
PDFDEBUG { (%Global=>local: ) print dup == } if
GlobalObjects 1 index get dup Objects 4 1 roll lput true
} ifelse
} { % Else object is executable but not integer
exch pop true % Therefore must be executable dict. (stream)
} ifelse
} { % Else object is not executable.
exch pop true % Therefore it must have been resolved.
} ifelse
} bind def
/oforce /exec load def
/oget { % <array> <index> oget <object>
% <dict> <key> oget <object>
% Before release 6.20, this procedure stored the resolved
% object back into the referring slot. In order to support
% PDF linearization, we no longer do this.
get oforce
} bind def
/oforce_array { % <array> oforce_array <array>
[ exch { oforce } forall ]
} bind def
/oforce_elems { % <array> oforce_elems <first> ... <last>
{ oforce } forall
} bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget { % <dict> <key> knownoget <value> true
% <dict> <key> knownoget false
% See oget above regarding this procedure.
.knownget {
oforce dup null eq { pop false } { true } ifelse
} {
false
} ifelse
} bind def
% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F { % <file#> <object#> <generation#> F <object>
% Some PDF 1.1 files use F as a synonym for f!
.pdfcount 3 lt { f } { pop pop pop null } ifelse
} bind def
% Verify the generation number for a specified object
% Note: The values in Generations is the generation number plus 1.
% If the value in Generations is zero then the object is free.
/checkgeneration { % <object#> <generation#> checkgeneration <object#> <OK>
Generations 2 index lget 1 sub 1 index eq { % If generation # match ...
pop true % Then return true
} { % Else not a match ...
QUIET not { % Create warning message if not QUIET
Generations 2 index lget 0 eq { % Check if object is free ...
( **** Warning: reference to free object: )
} {
( **** Warning: wrong generation: )
} ifelse
2 index =string cvs concatstrings ( ) concatstrings % put obj #
1 index =string cvs concatstrings ( R\n) concatstrings % put gen #
pdfformaterror % Output warning message
} if
0 eq
} ifelse
} bind def
/R { % <object#> <generation#> R <object>
/resolveR cvx 3 packedarray cvx
} bind def
% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
valueopdict { } forall
/endobj dup cvx
.dicttomark readonly def
/obj { % <object#> <generation#> obj <object>
PDFfile objopdict .pdfrun
} bind def
/endobj { % <object#> <generation#> <object> endobj <object>
3 1 roll
% Read the xref entry if we haven't yet done so.
% This is only needed for generation # checking.
1 index resolved? {
pop
} if
checkgeneration {
% The only global objects we bother to save are
% (resource) dictionaries.
1 index dup gcheck exch type /dicttype eq and {
PDFDEBUG { (%Local=>global: ) print dup == } if
GlobalObjects 1 index 3 index put
IsGlobal 1 index 1 put
} if
Objects exch 2 index lput
} {
pop pop null
} ifelse
} bind def
% When resolving an object reference in an object stream, we stop at
% the end of file. Note: Objects in an object stream do not have either
% a starting 'obj' or and ending 'endobj'.
/resolveobjstreamopdict mark
valueopdict { } forall
(%%EOF) cvn { exit } bind
.dicttomark readonly def
% Note: This version of this function is not currently being used.
% Resolve all objects in an object stream
/resolveobjectstream { % <object stream #> resolveobjectstream -
PDFDEBUG { (%Resolving object stream: ) print } if
0 resolveR % Get the objectstream dict, all objstrms use 0 as the gen #
dup /First get % Save location of first object onto the stack
1 index /N get % Save number of objects onto the stack
2 index false resolvestream % Convert stream dict into a stream
/ReusableStreamDecode filter % We need to be able to position stream
% Objectstreams begin with list of object numbers and locations
% Create two arrays to hold object numbers and stream location
1 index array % Array for holding object number
2 index array % Array for holding stream object location
% Get the object numbers and locations.
0 1 5 index 1 sub { % Loop and collect obj # and locations
% Stack: objstreamdict First N objectstream [obj#] [loc] index
2 index 1 index % Setup to put obj# into object number array
5 index token pop put % Get stream, then get obj# and put into array
1 index 1 index % Setup to put object loc into location array
5 index token pop put % Get stream, get obj loc and put into array
pop % Remove loop index
} for
% Create a bytestring big enough for reading any object data
% Scan for the size of the largest object
0 0 % Init max object size and previous location
2 index { % Loop through all object locations
% Stack: ... maxsize prevloc currentloc
dup 4 1 roll % Save copy of object location into stack
exch sub % Object size = currentloc - prevloc
.max % Determine maximum object size
exch % Put max size under previous location
} forall
pop % Remove previous location
.bigstring % Create bytestring based upon max obj size
% Move to the start of the object data
3 index 6 index % Get objectstream and start of first object
setfileposition % Move to the start of the data
% Read the data for all objects except the last. We do
% not know the size of the last object so we need to treat
% it as a special case.
0 1 6 index 2 sub {
dup 4 index exch get % Get our current object number
% Stack: objstreamdict First N objectstream [obj#] [loc]
% bytestring loopindex object#
dup resolved? { % If we already have this object
(yyy) = pstack (yyy) = flush xxx
pop pop % Remove object and object number
1 add 2 index exch get % Get location of next object
6 index add 6 index exch % Form location of next object and get stream
setfileposition % Move to the start of the next object data
} { % Else this is a new object ...
% We are going to create a string for reading the object
2 index 0 % use our working string
% Determine the size of the object
5 index 4 index 1 add get % Get location of the next object
6 index 5 index get % Get location of this object
sub % Size of object = next loc - this loc
getinterval % Create string for reading object
6 index exch readstring pop % Read object
/ReusableStreamDecode filter % Convert string into a stream
resolveobjstreamopdict .pdfrun % Get PDF object
Objects exch 2 index exch lput % Put object into Objects array
pop pop % Remove object # and loop index
} ifelse
} for
pop pop % Remove our working string and loc array
% Now read the last object in the object stream. Since it
% is the last object, we can use the original stream and
% terminate when we hit the end of the stream
% Stack: objstreamdict First N objectstream [obj#]
2 index 1 sub get % Get our current object number
dup resolved? not { % If we do not already have this object
exch % Get our object stream
resolveobjstreamopdict .pdfrun % Get PDF object
Objects exch 2 index exch lput % Put object into Objects array
} if
pop pop pop pop % Clear stack
} bind def
% Resolve all objects in an object stream
/resolveobjectstream { % <object stream #> resolveobjectstream -
PDFDEBUG { (%Resolving object stream: ) print } if
0 resolveR % Get the objectstream dict, all objstrms use 0 as the gen #
dup /Type get /ObjStm ne { % Verify type is object stream
( **** Incorrect Type in object stream dictionary.\n) pdfformaterror
/resolveobjectstream cvx /typecheck signalerror
} if
dup /N get % Save number of objects onto the stack
1 index false resolvestream % Convert stream dict into a stream
/ReusableStreamDecode filter % We need to be able to position stream
% Objectstreams begin with list of object numbers and locations
1 index array % Create array for holding object number
% Get the object numbers
0 1 4 index 1 sub { % Loop and collect obj numbers
% Stack: objstreamdict N PDFDEBUG objectstream [obj#] loopindex
1 index 1 index % Setup to put obj# into object number array
4 index token pop put % Get stream, then get obj# and put into array
2 index token pop pop pop % Get stream, get obj loc and clear stack
} for
% Move to the start of the object data
1 index 4 index /First get % Get objectstream and start of first object
setfileposition % Move to the start of the data
% We disable PDFDEBUG while reading the data stream. We will
% print the data later
PDFDEBUG /PDFDEBUG false def % Save PDFDEBUG and disable it while reading
% Read the data for all objects. We check to see if we get
% the number of objects that we expect.
% Stack: objstreamdict N objectstream [obj#] PDFDEBUG
mark 4 -1 roll % Get objectstream
count 5 index add % Determine stack depth with objects
/PDFObjectStkCount exch def
resolveobjstreamopdict .pdfrun % Get PDF objects
PDFObjectStkCount count ne { % Check stack depth
( **** Incorrect object count in object stream.\n) pdfformaterror
/resolveobjectstream cvx /rangecheck signalerror
} if
% We have the object data
counttomark array astore % Put objects into an array
exch pop % Remove mark
exch /PDFDEBUG exch def % Restore PDFDEBUG flag
% Save the objects into Objects
0 1 2 index length 1 sub { % Loop through all objects
% Stack: objstreamdict N [obj#] [objects] loopindex
dup 3 index exch get % Get our current object number
dup resolved? { % If we already have this object
pop pop % Remove object and object number
} { % Else if we do not have this object
PDFDEBUG { (%Resolving compressed object: [) print dup =only ( 0]) = } if
Objects exch 3 index % Put the object into Objects
3 index get
PDFDEBUG { dup === flush } if
lput
} ifelse
pop % Remove loop index
} for
pop pop pop pop % Remove objstream, N, (obj#], and [objects]
} bind def
% When resolving an object reference, we stop at the endobj or endstream.
/resolveopdict mark
valueopdict { } forall
/endstream { endobj exit } bind
/endobj { endobj exit } bind
% OmniForm generates PDF file with endobj missing in some
% objects. AR ignores this. So we have to do it too.
/obj { pop pop endobj exit } bind
.dicttomark readonly def
/resolveR { % <object#> <generation#> resolveR <object>
PDFDEBUG {
PDFSTEPcount 1 le {
(%Resolving: ) print 2 copy 2 array astore ==
} if
} if
1 index resolved? { % If object has already been resolved ...
exch pop exch pop % then clear stack and return object
} { % Else if not resolved ...
PDFfile fileposition 3 1 roll % Save current file position
1 index Objects exch lget % Get location of object from xref
3 1 roll checkgeneration { % Verify the generation number
% Stack: savepos objpos obj#
ObjectStream 1 index lget dup 0 eq { % Check if obj in not an objstream
pop exch PDFoffset add PDFfile exch setfileposition
PDFfile token pop 2 copy ne
{ ( **** Unrecoverable error in xref!\n) pdfformaterror
/resolveR cvx /rangecheck signalerror
}
if pop PDFfile token pop
PDFfile token pop /obj ne
{ ( **** Unrecoverable error in xref!\n) pdfformaterror
/resolveR cvx /rangecheck signalerror
}
if
pdf_run_resolve % PDFfile resolveopdict .pdfrun
} { % Else the object is in an ObjectStream
% Process an objectstream object. We are going to resolve all
% of the objects in sthe stream and place them into the Objects
% array.
% Stack: savepos objpos obj# objectstream#
resolveobjectstream
resolved? { % If object has already been resolved ...
exch pop % Remove object pos from stack.
} {
pop pop null % Pop objpos and obj#, put null for object
} ifelse
} ifelse
} { % Else the generation number is wrong
% Don't cache if the generation # is wrong.
pop pop null % Pop objpos and obj#, put null for object
} ifelse % ifelse generation number is correct
exch PDFfile exch setfileposition % Return to original file position
} ifelse
} bind def
% ================================ Streams ================================ %
% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
% /File - the file or string where the stream contents are stored,
% if the stream is not an external one.
% /FilePosition - iff File is a file, the position in the file
% where the contents start.
% /StreamKey - the key used to decrypt this stream, if any.
% We do the real work of constructing the data stream only when the
% contents are needed.
% Construct a stream. The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
%
% PDF files are inconsistent about what may fall between the 'stream' keyword
% and the actual stream data, and it appears that no one algorithm can
% detect this reliably. We used to try to guess whether the file included
% extraneous \r and/or \n characters, but we no longer attempt to do so,
% especially since the PDF 1.2 specification states flatly that the only
% legal terminators following the 'stream' keyword are \n or \r\n, both of
% which are properly skipped and discarded by the token operator.
% Unfortunately, this doesn't account for other whitespace characters that
% may have preceded the EOL, such as spaces or tabs. Thus we back up one
% character and scan until we find the \n terminator.
/stream { % <dict> stream <modified_dict>
dup /Length oget 0 eq {
dup /Filter undef % don't confuse any filters that require data
} if
dup /F known dup PDFsource PDFfile eq or {
not {
dup /File PDFfile put
% make sure that we are just past the EOL \n character
PDFfile dup fileposition 1 sub setfileposition % back up one
{ PDFfile read pop dup 13 eq {
% If there had been a \n, token would have advanced over it
% thus, if the terminator was \r, we have a format error!
( **** Warning: stream operator not terminated by valid EOL.\n) pdfformaterror
pop exit % fileposition is OK (just past the \r).
} if
10 eq { exit } if
} loop % scan past \n
dup /FilePosition PDFfile fileposition put
PDFDEBUG {
PDFSTEPcount 1 le {
(%FilePosition: ) print dup /FilePosition get ==
} if
} if
} if
% Some (bad) PDf files have invalid stream lengths. This causes problems
% if we reposition beyond the end of the file. So we compare the given
% length to number of bytes left in the file.
dup /Length oget
dup PDFfile bytesavailable lt { % compare to to bytes left in file
PDFfile fileposition % reposition to the end of stream
add PDFfile exch setfileposition
} {
pop % bad stream length - do not reposition.
% This will force a length warning below
} ifelse
} {
pop
% We're already reading from a stream, which we can't reposition.
% Capture the sub-stream contents in a string.
dup /Length oget string PDFsource exch readstring
not {
( **** Warning: Unexpected EOF in stream!\n) pdfformaterror
/stream cvx /rangecheck signalerror
} if
1 index exch /File exch put
} ifelse
PDFsource {token} stopped {
pop null
} {
not { null } if
} ifelse
dup /endobj eq {
% Another case that Acrobat Reader handles -- 'endobj' without 'endstream'.
( **** Warning: stream missing 'endstream'.\n) pdfformaterror
pop /endstream % fake a valid endstream
} if
/endstream ne {
( **** Warning: stream Length incorrect.\n) pdfformaterror
dup /Length undef % prevent the use of the incorrect length.
cvx endobj exit % exit from .pdfrun now.
} if
cvx
} bind def
/endstream {
exit
} bind def
% Contrary to the published PDF (1.3) specification, Acrobat Reader
% accepts abbreviated filter names everywhere, not just for in-line images,
% and some applications (notably htmldoc) rely on this.
/unabbrevfilterdict mark
/AHx /ASCIIHexDecode /A85 /ASCII85Decode /CCF /CCITTFaxDecode
/DCT /DCTDecode /Fl /FlateDecode /LZW /LZWDecode /RL /RunLengthDecode
.dicttomark readonly def
% Extract and apply filters.
/filterparms { % <dict> <DPkey> <Fkey> filterparms
% <dict> <parms> <filternames>
2 index exch knownoget {
exch 2 index exch knownoget {
% Both filters and parameters.
exch dup type /nametype eq {
1 array astore exch
dup type /arraytype ne { 1 array astore } if exch
} if
} {
% Filters, but no parameters.
null exch
dup type /nametype eq { 1 array astore } if
} ifelse
} {
% No filters: ignore parameters, if any.
pop null { }
} ifelse
} bind def
/filtername { % <filtername> filtername <filtername'>
//unabbrevfilterdict 1 index .knownget { exch pop } if
dup /Filter resourcestatus { pop pop } {
Repaired exch % this error is not the creator's fault
( **** ERROR: Unable to process ) pdfformaterror
64 string cvs pdfformaterror
( data. Page will be missing data.\n) pdfformaterror
/Repaired exch store % restore the previous "Repaired" state
% provide a filter that returns EOF (no data)
/.EOFDecode
} ifelse
} bind def
/applyfilters { % <parms> <source> <filternames> applyfilters <stream>
2 index null eq {
{ filtername filter }
} {
{ % Stack: parms source filtername
2 index 0 oget dup null eq { pop } {
exch filtername dup /JBIG2Decode eq { exch jbig2cachectx exch } if
} ifelse filter
exch dup length 1 sub 1 exch getinterval exch
}
} ifelse forall exch pop
} bind def
% JBIG2 streams have an optional 'globals' stream obj for
% sharing redundant data between page images. Here we resolve
% that stream reference (if any) and run it through the decoder,
% creating a special -jbig2globalctx- postscript object our
% JBIG2Decode filter implementation looks for in the parm dict.
/jbig2cachectx { % <parmdict> jbig2cachectx <parmdict>
dup /JBIG2Globals knownoget {
dup /Length oget
% make global ctx
PDFfile fileposition 3 1 roll % resolvestream is not reentrant
exch true resolvestream exch .bytestring
.readbytestring pop .jbig2makeglobalctx
PDFfile 3 -1 roll setfileposition
1 index exch
/.jbig2globalctx exch put
} if
} bind def
% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
% - Whether we are going to interpret the stream, or If we are just
% going to read data from them, we impose a SubFileDecode filter
% that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream { % <streamdict> <readdata?> resolvestream <stream>
1 index /F knownoget {
% This stream is stored on an external file.
(r) file 3 -1 roll
/FDecodeParms /FFilter filterparms
% Stack: readdata? file dict parms filternames
4 -1 roll exch
pdf_decrypt_stream
applyfilters
} {
exch dup /FilePosition .knownget {
1 index /File get exch setfileposition
} if
% Stack: readdata? dict
/DecodeParms /Filter filterparms
% Stack: readdata? dict parms filternames
2 index /File get exch
% Stack: readdata? dict parms file/string filternames
pdf_decrypt_stream % add decryption if needed
dup length 0 eq {
% All the PDF filters have EOD markers, but in this case
% there is no specified filter.
pop exch pop
% Stack: readdata? dict file/string
2 index 1 index type /filetype eq or {
% Use length for any files or reading data from any source.
1 index /Length knownoget not { 0 } if
} {
0 % Otherwise length of 0 for whole string
} ifelse
2 index /IDFlag known { pop } { () /SubFileDecode filter } ifelse
} {
applyfilters
} ifelse
} ifelse
% Stack: readdata? dict file
exch pop exch pop
} bind def
% ============================ Name/number trees ============================ %
/nameoget { % <nametree> <key> nameoget <obj|null>
exch /Names exch .treeget
} bind def
/numoget { % <numtree> <key> numoget <obj|null>
exch /Nums exch .treeget
} bind def
/.treeget { % <key> <leafkey> <tree> .treeget <obj|null>
dup /Kids knownoget {
exch pop .branchget
} {
exch get .leafget
} ifelse
} bind def
/.branchget { % <key> <leafkey> <kids> .branchget <obj|null>
dup length 0 eq {
pop pop pop null
} {
dup length -1 bitshift 2 copy oget
% Stack: key leafkey kids mid kids[mid]
dup /Limits oget aload pop
% Stack: key leafkey kids mid kids[mid] min max
6 index lt {
pop pop
1 add 1 index length 1 index sub getinterval .branchget
} {
5 index gt {
pop
0 exch getinterval .branchget
} {
exch pop exch pop .treeget
} ifelse
} ifelse
} ifelse
} bind def
/.leafget { % <key> <pairs> .leafget <obj|null>
dup length 2 eq {
dup 0 get 2 index eq { 1 oget } { pop null } ifelse
exch pop
} {
dup length -1 bitshift -2 and 2 copy oget
% Stack: key pairs mid pairs[mid]
3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
getinterval .leafget
} ifelse
} bind def
end % pdfdict
.setglobal