Skip to content
Snippets Groups Projects
pdfjs.parser.js 367 KiB
Newer Older
  • Learn to ignore specific revisions
  • Alexey Lunin's avatar
    Alexey Lunin committed

                  kidsOrNames = xref.fetchIfRef(kids[m]);
                  break;
                }
              }
              if (l > r) {
                return null;
              }
            }
    
            // If we get here, then we have found the right entry. Now
            // go through the named destinations in the Named dictionary
            // until we find the exact destination we're looking for.
            var names = kidsOrNames.get("Names");
            if (isArray(names)) {
              // Perform a binary search to reduce the lookup time.
              l = 0;
              r = names.length - 2;
              while (l <= r) {
                // Check only even indices (0, 2, 4, ...) because the
                // odd indices contain the actual D array.
                m = (l + r) & ~1;
                if (destinationId < xref.fetchIfRef(names[m])) {
                  r = m - 2;
                } else if (destinationId > xref.fetchIfRef(names[m])) {
                  l = m + 2;
                } else {
                  return xref.fetchIfRef(names[m + 1]);
                }
              }
            }
            return null;
          }
        };
        return NameTree;
      })();
    
      /**
       * "A PDF file can refer to the contents of another file by using a File
       * Specification (PDF 1.1)", see the spec (7.11) for more details.
       * NOTE: Only embedded files are supported (as part of the attachments support)
       * TODO: support the 'URL' file system (with caching if !/V), portable
       * collections attributes and related files (/RF)
       */
      var FileSpec = (function FileSpecClosure() {
        function FileSpec(root, xref) {
          if (!root || !isDict(root)) {
            return;
          }
          this.xref = xref;
          this.root = root;
          if (root.has("FS")) {
            this.fs = root.get("FS");
          }
          this.description = root.has("Desc")
            ? stringToPDFString(root.get("Desc"))
            : "";
          if (root.has("RF")) {
            warn("Related file specifications are not supported");
          }
          this.contentAvailable = true;
          if (!root.has("EF")) {
            this.contentAvailable = false;
            warn("Non-embedded file specifications are not supported");
          }
        }
    
        function pickPlatformItem(dict) {
          // Look for the filename in this order:
          // UF, F, Unix, Mac, DOS
          if (dict.has("UF")) {
            return dict.get("UF");
          } else if (dict.has("F")) {
            return dict.get("F");
          } else if (dict.has("Unix")) {
            return dict.get("Unix");
          } else if (dict.has("Mac")) {
            return dict.get("Mac");
          } else if (dict.has("DOS")) {
            return dict.get("DOS");
          } else {
            return null;
          }
        }
    
        FileSpec.prototype = {
          get filename() {
            if (!this._filename && this.root) {
              var filename = pickPlatformItem(this.root) || "unnamed";
              this._filename = stringToPDFString(filename)
                .replace(/\\\\/g, "\\")
                .replace(/\\\//g, "/")
                .replace(/\\/g, "/");
            }
            return this._filename;
          },
          get content() {
            if (!this.contentAvailable) {
              return null;
            }
            if (!this.contentRef && this.root) {
              this.contentRef = pickPlatformItem(this.root.get("EF"));
            }
            var content = null;
            if (this.contentRef) {
              var xref = this.xref;
              var fileObj = xref.fetchIfRef(this.contentRef);
              if (fileObj && isStream(fileObj)) {
                content = fileObj.getBytes();
              } else {
                warn(
                  "Embedded file specification points to non-existing/invalid " +
                    "content"
                );
              }
            } else {
              warn("Embedded file specification does not have a content");
            }
            return content;
          },
          get serializable() {
            return {
              filename: this.filename,
              content: this.content
            };
          }
        };
        return FileSpec;
      })();
    
      /**
       * A helper for loading missing data in object graphs. It traverses the graph
       * depth first and queues up any objects that have missing data. Once it has
       * has traversed as many objects that are available it attempts to bundle the
       * missing data requests and then resume from the nodes that weren't ready.
       *
       * NOTE: It provides protection from circular references by keeping track of
       * of loaded references. However, you must be careful not to load any graphs
       * that have references to the catalog or other pages since that will cause the
       * entire PDF document object graph to be traversed.
       */
      var ObjectLoader = (function() {
        function mayHaveChildren(value) {
          return isRef(value) || isDict(value) || isArray(value) || isStream(value);
        }
    
        function addChildren(node, nodesToVisit) {
          var value;
          if (isDict(node) || isStream(node)) {
            var map;
            if (isDict(node)) {
              map = node.map;
            } else {
              map = node.dict.map;
            }
            for (var key in map) {
              value = map[key];
              if (mayHaveChildren(value)) {
                nodesToVisit.push(value);
              }
            }
          } else if (isArray(node)) {
            for (var i = 0, ii = node.length; i < ii; i++) {
              value = node[i];
              if (mayHaveChildren(value)) {
                nodesToVisit.push(value);
              }
            }
          }
        }
    
        function ObjectLoader(obj, keys, xref) {
          this.obj = obj;
          this.keys = keys;
          this.xref = xref;
          this.refSet = null;
          this.capability = null;
        }
    
        ObjectLoader.prototype = {
          load: function ObjectLoader_load() {
            var keys = this.keys;
            this.capability = createPromiseCapability();
            // Don't walk the graph if all the data is already loaded.
            if (
              !(this.xref.stream instanceof ChunkedStream) ||
              this.xref.stream.getMissingChunks().length === 0
            ) {
              this.capability.resolve();
              return this.capability.promise;
            }
    
            this.refSet = new RefSet();
            // Setup the initial nodes to visit.
            var nodesToVisit = [];
            for (var i = 0; i < keys.length; i++) {
              nodesToVisit.push(this.obj[keys[i]]);
            }
    
            this._walk(nodesToVisit);
            return this.capability.promise;
          },
    
          _walk: function ObjectLoader_walk(nodesToVisit) {
            var nodesToRevisit = [];
            var pendingRequests = [];
            // DFS walk of the object graph.
            while (nodesToVisit.length) {
              var currentNode = nodesToVisit.pop();
    
              // Only references or chunked streams can cause missing data exceptions.
              if (isRef(currentNode)) {
                // Skip nodes that have already been visited.
                if (this.refSet.has(currentNode)) {
                  continue;
                }
                try {
                  var ref = currentNode;
                  this.refSet.put(ref);
                  currentNode = this.xref.fetch(currentNode);
                } catch (e) {
                  if (!(e instanceof MissingDataException)) {
                    throw e;
                  }
                  nodesToRevisit.push(currentNode);
                  pendingRequests.push({ begin: e.begin, end: e.end });
                }
              }
              if (currentNode && currentNode.getBaseStreams) {
                var baseStreams = currentNode.getBaseStreams();
                var foundMissingData = false;
                for (var i = 0; i < baseStreams.length; i++) {
                  var stream = baseStreams[i];
                  if (stream.getMissingChunks && stream.getMissingChunks().length) {
                    foundMissingData = true;
                    pendingRequests.push({
                      begin: stream.start,
                      end: stream.end
                    });
                  }
                }
                if (foundMissingData) {
                  nodesToRevisit.push(currentNode);
                }
              }
    
              addChildren(currentNode, nodesToVisit);
            }
    
            if (pendingRequests.length) {
              this.xref.stream.manager.requestRanges(pendingRequests).then(
                function pendingRequestCallback() {
                  nodesToVisit = nodesToRevisit;
                  for (var i = 0; i < nodesToRevisit.length; i++) {
                    var node = nodesToRevisit[i];
                    // Remove any reference nodes from the currrent refset so they
                    // aren't skipped when we revist them.
                    if (isRef(node)) {
                      this.refSet.remove(node);
                    }
                  }
                  this._walk(nodesToVisit);
                }.bind(this),
                this.capability.reject
              );
              return;
            }
            // Everything is loaded.
            this.refSet = null;
            this.capability.resolve();
          }
        };
    
        return ObjectLoader;
      })();
    
      exports.Catalog = Catalog;
      exports.ObjectLoader = ObjectLoader;
      exports.XRef = XRef;
    });
    
    /* Copyright 2012 Mozilla Foundation
     *
     * Licensed under the Apache License, Version 2.0 (the "License");
     * you may not use this file except in compliance with the License.
     * You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    
    ("use strict");
    
    (function(root, factory) {
      //if (typeof define === 'function' && define.amd) {
      //  define('pdfjs/core/document', ['exports', 'pdfjs/shared/util',
      //    'pdfjs/core/primitives', 'pdfjs/core/stream', 'pdfjs/core/obj',
      //    'pdfjs/core/parser', 'pdfjs/core/crypto'], factory);
      // } else if (typeof exports !== 'undefined') {
      //   factory(exports, require('../shared/util.js'), require('./primitives.js'),
      //     require('./stream.js'), require('./obj.js'), require('./parser.js'),
      //     require('./crypto.js'));
      //} else {
      factory(
        (root.pdfjsCoreDocument = {}),
        root.pdfjsSharedUtil,
        root.pdfjsCorePrimitives,
        root.pdfjsCoreStream,
        root.pdfjsCoreObj,
        root.pdfjsCoreParser,
        root.pdfjsCoreCrypto
      );
      //}
    })(window, function(
      exports,
      sharedUtil,
      corePrimitives,
      coreStream,
      coreObj,
      coreParser,
      coreCrypto
    ) {
      var MissingDataException = sharedUtil.MissingDataException;
      var Util = sharedUtil.Util;
      var assert = sharedUtil.assert;
      var error = sharedUtil.error;
      var info = sharedUtil.info;
      var isArray = sharedUtil.isArray;
      var isArrayBuffer = sharedUtil.isArrayBuffer;
      var isString = sharedUtil.isString;
      var shadow = sharedUtil.shadow;
      var stringToBytes = sharedUtil.stringToBytes;
      var stringToPDFString = sharedUtil.stringToPDFString;
      var warn = sharedUtil.warn;
      var Dict = corePrimitives.Dict;
      var isDict = corePrimitives.isDict;
      var isName = corePrimitives.isName;
      var isStream = corePrimitives.isStream;
      var NullStream = coreStream.NullStream;
      var Stream = coreStream.Stream;
      var StreamsSequenceStream = coreStream.StreamsSequenceStream;
      var Catalog = coreObj.Catalog;
      var ObjectLoader = coreObj.ObjectLoader;
      var XRef = coreObj.XRef;
      var Lexer = coreParser.Lexer;
      var Linearization = coreParser.Linearization;
      var calculateMD5 = coreCrypto.calculateMD5;
    
      /**
       * The `PDFDocument` holds all the data of the PDF file. Compared to the
       * `PDFDoc`, this one doesn't have any job management code.
       * Right now there exists one PDFDocument on the main thread + one object
       * for each worker. If there is no worker support enabled, there are two
       * `PDFDocument` objects on the main thread created.
       */
      var PDFDocument = (function PDFDocumentClosure() {
        var FINGERPRINT_FIRST_BYTES = 1024;
        var EMPTY_FINGERPRINT =
          "\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
    
        function PDFDocument(pdfManager, arg, password) {
          if (isStream(arg)) {
            init.call(this, pdfManager, arg, password);
          } else if (isArrayBuffer(arg)) {
            init.call(this, pdfManager, new Stream(arg), password);
          } else {
            error("PDFDocument: Unknown argument type");
          }
        }
    
        function init(pdfManager, stream, password) {
          assert(stream.length > 0, "stream must have data");
          this.pdfManager = pdfManager;
          this.stream = stream;
          var xref = new XRef(this.stream, password, pdfManager);
          this.xref = xref;
        }
    
        function find(stream, needle, limit, backwards) {
          var pos = stream.pos;
          var end = stream.end;
          var strBuf = [];
          if (pos + limit > end) {
            limit = end - pos;
          }
          for (var n = 0; n < limit; ++n) {
            strBuf.push(String.fromCharCode(stream.getByte()));
          }
          var str = strBuf.join("");
          stream.pos = pos;
          var index = backwards ? str.lastIndexOf(needle) : str.indexOf(needle);
          if (index === -1) {
            return false; /* not found */
          }
          stream.pos += index;
          return true; /* found */
        }
    
        var DocumentInfoValidators = {
          get entries() {
            // Lazily build this since all the validation functions below are not
            // defined until after this file loads.
            return shadow(this, "entries", {
              Title: isString,
              Author: isString,
              Subject: isString,
              Keywords: isString,
              Creator: isString,
              Producer: isString,
              CreationDate: isString,
              ModDate: isString,
              Trapped: isName
            });
          }
        };
    
        PDFDocument.prototype = {
          parse: function PDFDocument_parse(recoveryMode) {
            this.setup(recoveryMode);
            var version = this.catalog.catDict.get("Version");
            if (isName(version)) {
              this.pdfFormatVersion = version.name;
            }
            try {
              // checking if AcroForm is present
              this.acroForm = this.catalog.catDict.get("AcroForm");
              if (this.acroForm) {
                this.xfa = this.acroForm.get("XFA");
                var fields = this.acroForm.get("Fields");
                if (
                  (!fields || !isArray(fields) || fields.length === 0) &&
                  !this.xfa
                ) {
                  // no fields and no XFA -- not a form (?)
                  this.acroForm = null;
                }
              }
            } catch (ex) {
              info("Something wrong with AcroForm entry");
              this.acroForm = null;
            }
          },
    
          get linearization() {
            var linearization = null;
            if (this.stream.length) {
              try {
                linearization = Linearization.create(this.stream);
              } catch (err) {
                if (err instanceof MissingDataException) {
                  throw err;
                }
                info(err);
              }
            }
            // shadow the prototype getter with a data property
            return shadow(this, "linearization", linearization);
          },
          get startXRef() {
            var stream = this.stream;
            var startXRef = 0;
            var linearization = this.linearization;
            if (linearization) {
              // Find end of first obj.
              stream.reset();
              if (find(stream, "endobj", 1024)) {
                startXRef = stream.pos + 6;
              }
            } else {
              // Find startxref by jumping backward from the end of the file.
              var step = 1024;
              var found = false,
                pos = stream.end;
              while (!found && pos > 0) {
                pos -= step - "startxref".length;
                if (pos < 0) {
                  pos = 0;
                }
                stream.pos = pos;
                found = find(stream, "startxref", step, true);
              }
              if (found) {
                stream.skip(9);
                var ch;
                do {
                  ch = stream.getByte();
                } while (Lexer.isSpace(ch));
                var str = "";
                while (ch >= 0x20 && ch <= 0x39) {
                  // < '9'
                  str += String.fromCharCode(ch);
                  ch = stream.getByte();
                }
                startXRef = parseInt(str, 10);
                if (isNaN(startXRef)) {
                  startXRef = 0;
                }
              }
            }
            // shadow the prototype getter with a data property
            return shadow(this, "startXRef", startXRef);
          },
          get mainXRefEntriesOffset() {
            var mainXRefEntriesOffset = 0;
            var linearization = this.linearization;
            if (linearization) {
              mainXRefEntriesOffset = linearization.mainXRefEntriesOffset;
            }
            // shadow the prototype getter with a data property
            return shadow(this, "mainXRefEntriesOffset", mainXRefEntriesOffset);
          },
          // Find the header, remove leading garbage and setup the stream
          // starting from the header.
          checkHeader: function PDFDocument_checkHeader() {
            var stream = this.stream;
            stream.reset();
            if (find(stream, "%PDF-", 1024)) {
              // Found the header, trim off any garbage before it.
              stream.moveStart();
              // Reading file format version
              var MAX_VERSION_LENGTH = 12;
              var version = "",
                ch;
              while ((ch = stream.getByte()) > 0x20) {
                // SPACE
                if (version.length >= MAX_VERSION_LENGTH) {
                  break;
                }
                version += String.fromCharCode(ch);
              }
              if (!this.pdfFormatVersion) {
                // removing "%PDF-"-prefix
                this.pdfFormatVersion = version.substring(5);
              }
              return;
            }
            // May not be a PDF file, continue anyway.
          },
          parseStartXRef: function PDFDocument_parseStartXRef() {
            var startXRef = this.startXRef;
            this.xref.setStartXRef(startXRef);
          },
          setup: function PDFDocument_setup(recoveryMode) {
            this.xref.parse(recoveryMode);
            var self = this;
            this.catalog = new Catalog(this.pdfManager, this.xref, false);
          },
          get numPages() {
            var linearization = this.linearization;
            var num = linearization
              ? linearization.numPages
              : this.catalog.numPages;
            // shadow the prototype getter
            return shadow(this, "numPages", num);
          },
          get documentInfo() {
            var docInfo = {
              PDFFormatVersion: this.pdfFormatVersion,
              IsAcroFormPresent: !!this.acroForm,
              IsXFAPresent: !!this.xfa
            };
            var infoDict;
            try {
              infoDict = this.xref.trailer.get("Info");
            } catch (err) {
              info("The document information dictionary is invalid.");
            }
            if (infoDict) {
              var validEntries = DocumentInfoValidators.entries;
              // Only fill the document info with valid entries from the spec.
              for (var key in validEntries) {
                if (infoDict.has(key)) {
                  var value = infoDict.get(key);
                  // Make sure the value conforms to the spec.
                  if (validEntries[key](value)) {
                    docInfo[key] =
                      typeof value !== "string" ? value : stringToPDFString(value);
                  } else {
                    info('Bad value in document info for "' + key + '"');
                  }
                }
              }
            }
            return shadow(this, "documentInfo", docInfo);
          },
          get fingerprint() {
            var xref = this.xref,
              hash,
              fileID = "";
            var idArray = xref.trailer.get("ID");
    
            if (
              idArray &&
              isArray(idArray) &&
              idArray[0] &&
              isString(idArray[0]) &&
              idArray[0] !== EMPTY_FINGERPRINT
            ) {
              hash = stringToBytes(idArray[0]);
            } else {
              if (this.stream.ensureRange) {
                this.stream.ensureRange(
                  0,
                  Math.min(FINGERPRINT_FIRST_BYTES, this.stream.end)
                );
              }
              hash = calculateMD5(
                this.stream.bytes.subarray(0, FINGERPRINT_FIRST_BYTES),
                0,
                FINGERPRINT_FIRST_BYTES
              );
            }
    
            for (var i = 0, n = hash.length; i < n; i++) {
              var hex = hash[i].toString(16);
              fileID += hex.length === 1 ? "0" + hex : hex;
            }
    
            return shadow(this, "fingerprint", fileID);
          },
    
          getPage: function PDFDocument_getPage(pageIndex) {
            return this.catalog.getPage(pageIndex);
          },
    
          cleanup: function PDFDocument_cleanup() {
            return this.catalog.cleanup();
          }
        };
    
        return PDFDocument;
      })();
    
      exports.PDFDocument = PDFDocument;
    });