Skip to content
Snippets Groups Projects
pdfjs.parser.js 337 KiB
Newer Older
  • Learn to ignore specific revisions
  • 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190 10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205 10206 10207 10208 10209 10210 10211 10212 10213 10214 10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250 10251 10252 10253 10254 10255 10256
    
      function PDFDocument(pdfManager, arg, password) {
        if (isStream(arg)) {
          init.call(this, pdfManager, arg, password);
        } else if (isArrayBuffer(arg)) {
          init.call(this, pdfManager, new Stream(arg), password);
        } else {
          error('PDFDocument: Unknown argument type');
        }
      }
    
      function init(pdfManager, stream, password) {
        assert(stream.length > 0, 'stream must have data');
        this.pdfManager = pdfManager;
        this.stream = stream;
        var xref = new XRef(this.stream, password, pdfManager);
        this.xref = xref;
      }
    
      function find(stream, needle, limit, backwards) {
        var pos = stream.pos;
        var end = stream.end;
        var strBuf = [];
        if (pos + limit > end) {
          limit = end - pos;
        }
        for (var n = 0; n < limit; ++n) {
          strBuf.push(String.fromCharCode(stream.getByte()));
        }
        var str = strBuf.join('');
        stream.pos = pos;
        var index = backwards ? str.lastIndexOf(needle) : str.indexOf(needle);
        if (index === -1) {
          return false; /* not found */
        }
        stream.pos += index;
        return true; /* found */
      }
    
      var DocumentInfoValidators = {
        get entries() {
          // Lazily build this since all the validation functions below are not
          // defined until after this file loads.
          return shadow(this, 'entries', {
            Title: isString,
            Author: isString,
            Subject: isString,
            Keywords: isString,
            Creator: isString,
            Producer: isString,
            CreationDate: isString,
            ModDate: isString,
            Trapped: isName
          });
        }
      };
    
      PDFDocument.prototype = {
        parse: function PDFDocument_parse(recoveryMode) {
          this.setup(recoveryMode);
          var version = this.catalog.catDict.get('Version');
          if (isName(version)) {
            this.pdfFormatVersion = version.name;
          }
          try {
            // checking if AcroForm is present
            this.acroForm = this.catalog.catDict.get('AcroForm');
            if (this.acroForm) {
              this.xfa = this.acroForm.get('XFA');
              var fields = this.acroForm.get('Fields');
              if ((!fields || !isArray(fields) || fields.length === 0) &&
                  !this.xfa) {
                // no fields and no XFA -- not a form (?)
                this.acroForm = null;
              }
            }
          } catch (ex) {
            info('Something wrong with AcroForm entry');
            this.acroForm = null;
          }
        },
    
        get linearization() {
          var linearization = null;
          if (this.stream.length) {
            try {
              linearization = Linearization.create(this.stream);
            } catch (err) {
              if (err instanceof MissingDataException) {
                throw err;
              }
              info(err);
            }
          }
          // shadow the prototype getter with a data property
          return shadow(this, 'linearization', linearization);
        },
        get startXRef() {
          var stream = this.stream;
          var startXRef = 0;
          var linearization = this.linearization;
          if (linearization) {
            // Find end of first obj.
            stream.reset();
            if (find(stream, 'endobj', 1024)) {
              startXRef = stream.pos + 6;
            }
          } else {
            // Find startxref by jumping backward from the end of the file.
            var step = 1024;
            var found = false, pos = stream.end;
            while (!found && pos > 0) {
              pos -= step - 'startxref'.length;
              if (pos < 0) {
                pos = 0;
              }
              stream.pos = pos;
              found = find(stream, 'startxref', step, true);
            }
            if (found) {
              stream.skip(9);
              var ch;
              do {
                ch = stream.getByte();
              } while (Lexer.isSpace(ch));
              var str = '';
              while (ch >= 0x20 && ch <= 0x39) { // < '9'
                str += String.fromCharCode(ch);
                ch = stream.getByte();
              }
              startXRef = parseInt(str, 10);
              if (isNaN(startXRef)) {
                startXRef = 0;
              }
            }
          }
          // shadow the prototype getter with a data property
          return shadow(this, 'startXRef', startXRef);
        },
        get mainXRefEntriesOffset() {
          var mainXRefEntriesOffset = 0;
          var linearization = this.linearization;
          if (linearization) {
            mainXRefEntriesOffset = linearization.mainXRefEntriesOffset;
          }
          // shadow the prototype getter with a data property
          return shadow(this, 'mainXRefEntriesOffset', mainXRefEntriesOffset);
        },
        // Find the header, remove leading garbage and setup the stream
        // starting from the header.
        checkHeader: function PDFDocument_checkHeader() {
          var stream = this.stream;
          stream.reset();
          if (find(stream, '%PDF-', 1024)) {
            // Found the header, trim off any garbage before it.
            stream.moveStart();
            // Reading file format version
            var MAX_VERSION_LENGTH = 12;
            var version = '', ch;
            while ((ch = stream.getByte()) > 0x20) { // SPACE
              if (version.length >= MAX_VERSION_LENGTH) {
                break;
              }
              version += String.fromCharCode(ch);
            }
            if (!this.pdfFormatVersion) {
              // removing "%PDF-"-prefix
              this.pdfFormatVersion = version.substring(5);
            }
            return;
          }
          // May not be a PDF file, continue anyway.
        },
        parseStartXRef: function PDFDocument_parseStartXRef() {
          var startXRef = this.startXRef;
          this.xref.setStartXRef(startXRef);
        },
        setup: function PDFDocument_setup(recoveryMode) {
          this.xref.parse(recoveryMode);
          var self = this;
          this.catalog = new Catalog(this.pdfManager, this.xref, false);
        },
        get numPages() {
          var linearization = this.linearization;
          var num = linearization ? linearization.numPages : this.catalog.numPages;
          // shadow the prototype getter
          return shadow(this, 'numPages', num);
        },
        get documentInfo() {
          var docInfo = {
            PDFFormatVersion: this.pdfFormatVersion,
            IsAcroFormPresent: !!this.acroForm,
            IsXFAPresent: !!this.xfa
          };
          var infoDict;
          try {
            infoDict = this.xref.trailer.get('Info');
          } catch (err) {
            info('The document information dictionary is invalid.');
          }
          if (infoDict) {
            var validEntries = DocumentInfoValidators.entries;
            // Only fill the document info with valid entries from the spec.
            for (var key in validEntries) {
              if (infoDict.has(key)) {
                var value = infoDict.get(key);
                // Make sure the value conforms to the spec.
                if (validEntries[key](value)) {
                  docInfo[key] = (typeof value !== 'string' ?
                                  value : stringToPDFString(value));
                } else {
                  info('Bad value in document info for "' + key + '"');
                }
              }
            }
          }
          return shadow(this, 'documentInfo', docInfo);
        },
        get fingerprint() {
          var xref = this.xref, hash, fileID = '';
          var idArray = xref.trailer.get('ID');
    
          if (idArray && isArray(idArray) && idArray[0] && isString(idArray[0]) &&
              idArray[0] !== EMPTY_FINGERPRINT) {
            hash = stringToBytes(idArray[0]);
          } else {
            if (this.stream.ensureRange) {
              this.stream.ensureRange(0,
                Math.min(FINGERPRINT_FIRST_BYTES, this.stream.end));
            }
            hash = calculateMD5(this.stream.bytes.subarray(0,
              FINGERPRINT_FIRST_BYTES), 0, FINGERPRINT_FIRST_BYTES);
          }
    
          for (var i = 0, n = hash.length; i < n; i++) {
            var hex = hash[i].toString(16);
            fileID += hex.length === 1 ? '0' + hex : hex;
          }
    
          return shadow(this, 'fingerprint', fileID);
        },
    
        getPage: function PDFDocument_getPage(pageIndex) {
          return this.catalog.getPage(pageIndex);
        },
    
        cleanup: function PDFDocument_cleanup() {
          return this.catalog.cleanup();
        }
      };
    
      return PDFDocument;
    })();
    
    exports.PDFDocument = PDFDocument;
    }));