Skip to content
Snippets Groups Projects
pdfjs.parser.js 337 KiB
Newer Older
  • Learn to ignore specific revisions
  • 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890 9891 9892 9893 9894 9895 9896 9897 9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943 9944 9945 9946
          //      /Root 20R
          //      /Info 10R
          //      /ID [ <81b14aafa313db63dbd6f981e49f94f4> ]
          //    >>
          // The parser goes through the entire stream << ... >> and provides
          // a getter interface for the key-value table
          var dict = parser.getObj();
    
          // The pdflib PDF generator can generate a nested trailer dictionary
          if (!isDict(dict) && dict.dict) {
            dict = dict.dict;
          }
          if (!isDict(dict)) {
            error('Invalid XRef table: could not parse trailer dictionary');
          }
          delete this.tableState;
    
          return dict;
        },
    
        readXRefTable: function XRef_readXRefTable(parser) {
          // Example of cross-reference table:
          // xref
          // 0 1                    <-- subsection header (first obj #, obj count)
          // 0000000000 65535 f     <-- actual object (offset, generation #, f/n)
          // 23 2                   <-- subsection header ... and so on ...
          // 0000025518 00002 n
          // 0000025635 00000 n
          // trailer
          // ...
    
          var stream = parser.lexer.stream;
          var tableState = this.tableState;
          stream.pos = tableState.streamPos;
          parser.buf1 = tableState.parserBuf1;
          parser.buf2 = tableState.parserBuf2;
    
          // Outer loop is over subsection headers
          var obj;
    
          while (true) {
            if (!('firstEntryNum' in tableState) || !('entryCount' in tableState)) {
              if (isCmd(obj = parser.getObj(), 'trailer')) {
                break;
              }
              tableState.firstEntryNum = obj;
              tableState.entryCount = parser.getObj();
            }
    
            var first = tableState.firstEntryNum;
            var count = tableState.entryCount;
            if (!isInt(first) || !isInt(count)) {
              error('Invalid XRef table: wrong types in subsection header');
            }
            // Inner loop is over objects themselves
            for (var i = tableState.entryNum; i < count; i++) {
              tableState.streamPos = stream.pos;
              tableState.entryNum = i;
              tableState.parserBuf1 = parser.buf1;
              tableState.parserBuf2 = parser.buf2;
    
              var entry = {};
              entry.offset = parser.getObj();
              entry.gen = parser.getObj();
              var type = parser.getObj();
    
              if (isCmd(type, 'f')) {
                entry.free = true;
              } else if (isCmd(type, 'n')) {
                entry.uncompressed = true;
              }
    
              // Validate entry obj
              if (!isInt(entry.offset) || !isInt(entry.gen) ||
                  !(entry.free || entry.uncompressed)) {
                error('Invalid entry in XRef subsection: ' + first + ', ' + count);
              }
    
              if (!this.entries[i + first]) {
                this.entries[i + first] = entry;
              }
            }
    
            tableState.entryNum = 0;
            tableState.streamPos = stream.pos;
            tableState.parserBuf1 = parser.buf1;
            tableState.parserBuf2 = parser.buf2;
            delete tableState.firstEntryNum;
            delete tableState.entryCount;
          }
    
          // Per issue 3248: hp scanners generate bad XRef
          if (first === 1 && this.entries[1] && this.entries[1].free) {
            // shifting the entries
            this.entries.shift();
          }
    
          // Sanity check: as per spec, first object must be free
          if (this.entries[0] && !this.entries[0].free) {
            error('Invalid XRef table: unexpected first object');
          }
          return obj;
        },
    
        processXRefStream: function XRef_processXRefStream(stream) {
          if (!('streamState' in this)) {
            // Stores state of the stream as we process it so we can resume
            // from middle of stream in case of missing data error
            var streamParameters = stream.dict;
            var byteWidths = streamParameters.get('W');
            var range = streamParameters.get('Index');
            if (!range) {
              range = [0, streamParameters.get('Size')];
            }
    
            this.streamState = {
              entryRanges: range,
              byteWidths: byteWidths,
              entryNum: 0,
              streamPos: stream.pos
            };
          }
          this.readXRefStream(stream);
          delete this.streamState;
    
          return stream.dict;
        },
    
        readXRefStream: function XRef_readXRefStream(stream) {
          var i, j;
          var streamState = this.streamState;
          stream.pos = streamState.streamPos;
    
          var byteWidths = streamState.byteWidths;
          var typeFieldWidth = byteWidths[0];
          var offsetFieldWidth = byteWidths[1];
          var generationFieldWidth = byteWidths[2];
    
          var entryRanges = streamState.entryRanges;
          while (entryRanges.length > 0) {
            var first = entryRanges[0];
            var n = entryRanges[1];
    
            if (!isInt(first) || !isInt(n)) {
              error('Invalid XRef range fields: ' + first + ', ' + n);
            }
            if (!isInt(typeFieldWidth) || !isInt(offsetFieldWidth) ||
                !isInt(generationFieldWidth)) {
              error('Invalid XRef entry fields length: ' + first + ', ' + n);
            }
            for (i = streamState.entryNum; i < n; ++i) {
              streamState.entryNum = i;
              streamState.streamPos = stream.pos;
    
              var type = 0, offset = 0, generation = 0;
              for (j = 0; j < typeFieldWidth; ++j) {
                type = (type << 8) | stream.getByte();
              }
              // if type field is absent, its default value is 1
              if (typeFieldWidth === 0) {
                type = 1;
              }
              for (j = 0; j < offsetFieldWidth; ++j) {
                offset = (offset << 8) | stream.getByte();
              }
              for (j = 0; j < generationFieldWidth; ++j) {
                generation = (generation << 8) | stream.getByte();
              }
              var entry = {};
              entry.offset = offset;
              entry.gen = generation;
              switch (type) {
                case 0:
                  entry.free = true;
                  break;
                case 1:
                  entry.uncompressed = true;
                  break;
                case 2:
                  break;
                default:
                  error('Invalid XRef entry type: ' + type);
              }
              if (!this.entries[first + i]) {
                this.entries[first + i] = entry;
              }
            }
    
            streamState.entryNum = 0;
            streamState.streamPos = stream.pos;
            entryRanges.splice(0, 2);
          }
        },
    
        indexObjects: function XRef_indexObjects() {
          // Simple scan through the PDF content to find objects,
          // trailers and XRef streams.
          var TAB = 0x9, LF = 0xA, CR = 0xD, SPACE = 0x20;
          var PERCENT = 0x25, LT = 0x3C;
    
          function readToken(data, offset) {
            var token = '', ch = data[offset];
            while (ch !== LF && ch !== CR && ch !== LT) {
              if (++offset >= data.length) {
                break;
              }
              token += String.fromCharCode(ch);
              ch = data[offset];
            }
            return token;
          }
          function skipUntil(data, offset, what) {
            var length = what.length, dataLength = data.length;
            var skipped = 0;
            // finding byte sequence
            while (offset < dataLength) {
              var i = 0;
              while (i < length && data[offset + i] === what[i]) {
                ++i;
              }
              if (i >= length) {
                break; // sequence found
              }
              offset++;
              skipped++;
            }
            return skipped;
          }
          var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
          var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
          var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
                                              101, 102]);
          var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
          var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
    
          // Clear out any existing entries, since they may be bogus.
          this.entries.length = 0;
    
          var stream = this.stream;
          stream.pos = 0;
          var buffer = stream.getBytes();
          var position = stream.start, length = buffer.length;
          var trailers = [], xrefStms = [];
          while (position < length) {
            var ch = buffer[position];
            if (ch === TAB || ch === LF || ch === CR || ch === SPACE) {
              ++position;
              continue;
            }
            if (ch === PERCENT) { // %-comment
              do {
                ++position;
                if (position >= length) {
                  break;
                }
                ch = buffer[position];
              } while (ch !== LF && ch !== CR);
              continue;
            }
            var token = readToken(buffer, position);
            var m;
            if (token.indexOf('xref') === 0 &&
                (token.length === 4 || /\s/.test(token[4]))) {
              position += skipUntil(buffer, position, trailerBytes);
              trailers.push(position);
              position += skipUntil(buffer, position, startxrefBytes);
            } else if ((m = objRegExp.exec(token))) {
              if (typeof this.entries[m[1]] === 'undefined') {
                this.entries[m[1]] = {
                  offset: position - stream.start,
                  gen: m[2] | 0,
                  uncompressed: true
                };
              }
              var contentLength = skipUntil(buffer, position, endobjBytes) + 7;
              var content = buffer.subarray(position, position + contentLength);
    
              // checking XRef stream suspect
              // (it shall have '/XRef' and next char is not a letter)
              var xrefTagOffset = skipUntil(content, 0, xrefBytes);
              if (xrefTagOffset < contentLength &&
                  content[xrefTagOffset + 5] < 64) {
                xrefStms.push(position - stream.start);
                this.xrefstms[position - stream.start] = 1; // Avoid recursion
              }
    
              position += contentLength;
            } else if (token.indexOf('trailer') === 0 &&
                       (token.length === 7 || /\s/.test(token[7]))) {
              trailers.push(position);
              position += skipUntil(buffer, position, startxrefBytes);
            } else {
              position += token.length + 1;
            }
          }
          // reading XRef streams
          var i, ii;
          for (i = 0, ii = xrefStms.length; i < ii; ++i) {
            this.startXRefQueue.push(xrefStms[i]);
            this.xrefBlocks.push(xrefStms[i]);
            this.readXRef(/* recoveryMode */ true);
          }
          // finding main trailer
          var dict;
          for (i = 0, ii = trailers.length; i < ii; ++i) {
            stream.pos = trailers[i];
            var parser = new Parser(new Lexer(stream), true, this);
            var obj = parser.getObj();
            if (!isCmd(obj, 'trailer')) {
              continue;
            }
            // read the trailer dictionary
            if (!isDict(dict = parser.getObj())) {
              continue;
            }
            // taking the first one with 'ID'
            if (dict.has('ID')) {
              return dict;
            }
          }
          // no tailer with 'ID', taking last one (if exists)
          if (dict) {
            return dict;
          }
          // nothing helps
          // calling error() would reject worker with an UnknownErrorException.
          throw new InvalidPDFException('Invalid PDF structure');
        },
    
        readXRef: function XRef_readXRef(recoveryMode) {
          var stream = this.stream;
    
          try {
            while (this.startXRefQueue.length) {
              var startXRef = this.startXRefQueue[0];
    
              stream.pos = startXRef + stream.start;
    
              var parser = new Parser(new Lexer(stream), true, this);
              var obj = parser.getObj();
              var dict;
    
              // Get dictionary
              if (isCmd(obj, 'xref')) {
                // Parse end-of-file XRef
                dict = this.processXRefTable(parser);
                if (!this.topDict) {
                  this.topDict = dict;
                }
    
                // Recursively get other XRefs 'XRefStm', if any
                obj = dict.get('XRefStm');
                if (isInt(obj)) {
                  var pos = obj;
                  // ignore previously loaded xref streams
                  // (possible infinite recursion)
                  if (!(pos in this.xrefstms)) {
                    this.xrefstms[pos] = 1;
                    this.startXRefQueue.push(pos);
                    this.xrefBlocks.push(pos);
                  }
                }
              } else if (isInt(obj)) {
                // Parse in-stream XRef
                if (!isInt(parser.getObj()) ||
                    !isCmd(parser.getObj(), 'obj') ||
                    !isStream(obj = parser.getObj())) {
                  error('Invalid XRef stream');
                }
                dict = this.processXRefStream(obj);
                if (!this.topDict) {
                  this.topDict = dict;
                }
                if (!dict) {
                  error('Failed to read XRef stream');
                }
              } else {
                error('Invalid XRef stream header');
              }
    
              // Recursively get previous dictionary, if any
              obj = dict.get('Prev');
              if (isInt(obj)) {
                this.startXRefQueue.push(obj);
                this.xrefBlocks.push(obj);
              } else if (isRef(obj)) {
                // The spec says Prev must not be a reference, i.e. "/Prev NNN"
                // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
                this.startXRefQueue.push(obj.num);
                this.xrefBlocks.push(obj.num);
              }
              this.xrefBlocks.push(stream.pos);
              this.startXRefQueue.shift();
            }
    
            return this.topDict;
          } catch (e) {
            if (e instanceof MissingDataException) {
              throw e;
            }
            info('(while reading XRef): ' + e);
          }
    
          if (recoveryMode) {
            return;
          }
          throw new XRefParseException();
        },
    
        getEntry: function XRef_getEntry(i) {
          var xrefEntry = this.entries[i];
          if (xrefEntry && !xrefEntry.free && xrefEntry.offset) {
            return xrefEntry;
          }
          return null;
        },
    
        fetchIfRef: function XRef_fetchIfRef(obj) {
          if (!isRef(obj)) {
            return obj;
          }
          return this.fetch(obj);
        },
    
        fetch: function XRef_fetch(ref, suppressEncryption) {
          assert(isRef(ref), 'ref object is not a reference');
          var num = ref.num;
          if (num in this.cache) {
            var cacheEntry = this.cache[num];
            return cacheEntry;
          }
    
          var xrefEntry = this.getEntry(num);
    
          // the referenced entry can be free
          if (xrefEntry === null) {
            return (this.cache[num] = null);
          }
    
          if (xrefEntry.uncompressed) {
            xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
          } else {
            xrefEntry = this.fetchCompressed(xrefEntry, suppressEncryption);
          }
          if (isDict(xrefEntry)){
            xrefEntry.objId = ref.toString();
          } else if (isStream(xrefEntry)) {
            xrefEntry.dict.objId = ref.toString();
          }
          return xrefEntry;
        },
    
        fetchUncompressed: function XRef_fetchUncompressed(ref, xrefEntry,
                                                           suppressEncryption) {
          var gen = ref.gen;
          var num = ref.num;
          if (xrefEntry.gen !== gen) {
            error('inconsistent generation in XRef');
          }
          var stream = this.stream.makeSubStream(xrefEntry.offset +
                                                 this.stream.start);
          var parser = new Parser(new Lexer(stream), true, this);
          var obj1 = parser.getObj();
          var obj2 = parser.getObj();
          var obj3 = parser.getObj();
          if (!isInt(obj1) || parseInt(obj1, 10) !== num ||
              !isInt(obj2) || parseInt(obj2, 10) !== gen ||
              !isCmd(obj3)) {
            error('bad XRef entry');
          }
          if (!isCmd(obj3, 'obj')) {
            // some bad PDFs use "obj1234" and really mean 1234
            if (obj3.cmd.indexOf('obj') === 0) {
              num = parseInt(obj3.cmd.substring(3), 10);
              if (!isNaN(num)) {
                return num;
              }
            }
            error('bad XRef entry');
          }
          if (this.encrypt && !suppressEncryption) {
            xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen));
          } else {
            xrefEntry = parser.getObj();
          }
          if (!isStream(xrefEntry)) {
            this.cache[num] = xrefEntry;
          }
          return xrefEntry;
        },
    
        fetchCompressed: function XRef_fetchCompressed(xrefEntry,
                                                       suppressEncryption) {
          var tableOffset = xrefEntry.offset;
          var stream = this.fetch(new Ref(tableOffset, 0));
          if (!isStream(stream)) {
            error('bad ObjStm stream');
          }
          var first = stream.dict.get('First');
          var n = stream.dict.get('N');
          if (!isInt(first) || !isInt(n)) {
            error('invalid first and n parameters for ObjStm stream');
          }
          var parser = new Parser(new Lexer(stream), false, this);
          parser.allowStreams = true;
          var i, entries = [], num, nums = [];
          // read the object numbers to populate cache
          for (i = 0; i < n; ++i) {
            num = parser.getObj();
            if (!isInt(num)) {
              error('invalid object number in the ObjStm stream: ' + num);
            }
            nums.push(num);
            var offset = parser.getObj();
            if (!isInt(offset)) {
              error('invalid object offset in the ObjStm stream: ' + offset);
            }
          }
          // read stream objects for cache
          for (i = 0; i < n; ++i) {
            entries.push(parser.getObj());
            num = nums[i];
            var entry = this.entries[num];
            if (entry && entry.offset === tableOffset && entry.gen === i) {
              this.cache[num] = entries[i];
            }
          }
          xrefEntry = entries[xrefEntry.gen];
          if (xrefEntry === undefined) {
            error('bad XRef entry for compressed object');
          }
          return xrefEntry;
        },
    
        fetchIfRefAsync: function XRef_fetchIfRefAsync(obj) {
          if (!isRef(obj)) {
            return Promise.resolve(obj);
          }
          return this.fetchAsync(obj);
        },
    
        fetchAsync: function XRef_fetchAsync(ref, suppressEncryption) {
          var streamManager = this.stream.manager;
          var xref = this;
          return new Promise(function tryFetch(resolve, reject) {
            try {
              resolve(xref.fetch(ref, suppressEncryption));
            } catch (e) {
              if (e instanceof MissingDataException) {
                streamManager.requestRange(e.begin, e.end).then(function () {
                  tryFetch(resolve, reject);
                }, reject);
                return;
              }
              reject(e);
            }
          });
        },
    
        getCatalogObj: function XRef_getCatalogObj() {
          return this.root;
        }
      };
    
      return XRef;
    })();
    
    /**
     * A NameTree is like a Dict but has some advantageous properties, see the
     * spec (7.9.6) for more details.
     * TODO: implement all the Dict functions and make this more efficent.
     */
    var NameTree = (function NameTreeClosure() {
      function NameTree(root, xref) {
        this.root = root;
        this.xref = xref;
      }
    
      NameTree.prototype = {
        getAll: function NameTree_getAll() {
          var dict = {};
          if (!this.root) {
            return dict;
          }
          var xref = this.xref;
          // reading name tree
          var processed = new RefSet();
          processed.put(this.root);
          var queue = [this.root];
          while (queue.length > 0) {
            var i, n;
            var obj = xref.fetchIfRef(queue.shift());
            if (!isDict(obj)) {
              continue;
            }
            if (obj.has('Kids')) {
              var kids = obj.get('Kids');
              for (i = 0, n = kids.length; i < n; i++) {
                var kid = kids[i];
                if (processed.has(kid)) {
                  error('invalid destinations');
                }
                queue.push(kid);
                processed.put(kid);
              }
              continue;
            }
            var names = obj.get('Names');
            if (names) {
              for (i = 0, n = names.length; i < n; i += 2) {
                dict[xref.fetchIfRef(names[i])] = xref.fetchIfRef(names[i + 1]);
              }
            }
          }
          return dict;
        },
    
        get: function NameTree_get(destinationId) {
          if (!this.root) {
            return null;
          }
    
          var xref = this.xref;
          var kidsOrNames = xref.fetchIfRef(this.root);
          var loopCount = 0;
          var MAX_NAMES_LEVELS = 10;
          var l, r, m;
    
          // Perform a binary search to quickly find the entry that
          // contains the named destination we are looking for.
          while (kidsOrNames.has('Kids')) {
            loopCount++;
            if (loopCount > MAX_NAMES_LEVELS) {
              warn('Search depth limit for named destionations has been reached.');
              return null;
            }
    
            var kids = kidsOrNames.get('Kids');
            if (!isArray(kids)) {
              return null;
            }
    
            l = 0;
            r = kids.length - 1;
            while (l <= r) {
              m = (l + r) >> 1;
              var kid = xref.fetchIfRef(kids[m]);
              var limits = kid.get('Limits');
    
              if (destinationId < xref.fetchIfRef(limits[0])) {
                r = m - 1;
              } else if (destinationId > xref.fetchIfRef(limits[1])) {
                l = m + 1;
              } else {
                kidsOrNames = xref.fetchIfRef(kids[m]);
                break;
              }
            }
            if (l > r) {
              return null;
            }
          }
    
          // If we get here, then we have found the right entry. Now
          // go through the named destinations in the Named dictionary
          // until we find the exact destination we're looking for.
          var names = kidsOrNames.get('Names');
          if (isArray(names)) {
            // Perform a binary search to reduce the lookup time.
            l = 0;
            r = names.length - 2;
            while (l <= r) {
              // Check only even indices (0, 2, 4, ...) because the
              // odd indices contain the actual D array.
              m = (l + r) & ~1;
              if (destinationId < xref.fetchIfRef(names[m])) {
                r = m - 2;
              } else if (destinationId > xref.fetchIfRef(names[m])) {
                l = m + 2;
              } else {
                return xref.fetchIfRef(names[m + 1]);
              }
            }
          }
          return null;
        }
      };
      return NameTree;
    })();
    
    /**
     * "A PDF file can refer to the contents of another file by using a File
     * Specification (PDF 1.1)", see the spec (7.11) for more details.
     * NOTE: Only embedded files are supported (as part of the attachments support)
     * TODO: support the 'URL' file system (with caching if !/V), portable
     * collections attributes and related files (/RF)
     */
    var FileSpec = (function FileSpecClosure() {
      function FileSpec(root, xref) {
        if (!root || !isDict(root)) {
          return;
        }
        this.xref = xref;
        this.root = root;
        if (root.has('FS')) {
          this.fs = root.get('FS');
        }
        this.description = root.has('Desc') ?
                             stringToPDFString(root.get('Desc')) :
                             '';
        if (root.has('RF')) {
          warn('Related file specifications are not supported');
        }
        this.contentAvailable = true;
        if (!root.has('EF')) {
          this.contentAvailable = false;
          warn('Non-embedded file specifications are not supported');
        }
      }
    
      function pickPlatformItem(dict) {
        // Look for the filename in this order:
        // UF, F, Unix, Mac, DOS
        if (dict.has('UF')) {
          return dict.get('UF');
        } else if (dict.has('F')) {
          return dict.get('F');
        } else if (dict.has('Unix')) {
          return dict.get('Unix');
        } else if (dict.has('Mac')) {
          return dict.get('Mac');
        } else if (dict.has('DOS')) {
          return dict.get('DOS');
        } else {
          return null;
        }
      }
    
      FileSpec.prototype = {
        get filename() {
          if (!this._filename && this.root) {
            var filename = pickPlatformItem(this.root) || 'unnamed';
            this._filename = stringToPDFString(filename).
              replace(/\\\\/g, '\\').
              replace(/\\\//g, '/').
              replace(/\\/g, '/');
          }
          return this._filename;
        },
        get content() {
          if (!this.contentAvailable) {
            return null;
          }
          if (!this.contentRef && this.root) {
            this.contentRef = pickPlatformItem(this.root.get('EF'));
          }
          var content = null;
          if (this.contentRef) {
            var xref = this.xref;
            var fileObj = xref.fetchIfRef(this.contentRef);
            if (fileObj && isStream(fileObj)) {
              content = fileObj.getBytes();
            } else {
              warn('Embedded file specification points to non-existing/invalid ' +
                'content');
            }
          } else {
            warn('Embedded file specification does not have a content');
          }
          return content;
        },
        get serializable() {
          return {
            filename: this.filename,
            content: this.content
          };
        }
      };
      return FileSpec;
    })();
    
    /**
     * A helper for loading missing data in object graphs. It traverses the graph
     * depth first and queues up any objects that have missing data. Once it has
     * has traversed as many objects that are available it attempts to bundle the
     * missing data requests and then resume from the nodes that weren't ready.
     *
     * NOTE: It provides protection from circular references by keeping track of
     * of loaded references. However, you must be careful not to load any graphs
     * that have references to the catalog or other pages since that will cause the
     * entire PDF document object graph to be traversed.
     */
    var ObjectLoader = (function() {
      function mayHaveChildren(value) {
        return isRef(value) || isDict(value) || isArray(value) || isStream(value);
      }
    
      function addChildren(node, nodesToVisit) {
        var value;
        if (isDict(node) || isStream(node)) {
          var map;
          if (isDict(node)) {
            map = node.map;
          } else {
            map = node.dict.map;
          }
          for (var key in map) {
            value = map[key];
            if (mayHaveChildren(value)) {
              nodesToVisit.push(value);
            }
          }
        } else if (isArray(node)) {
          for (var i = 0, ii = node.length; i < ii; i++) {
            value = node[i];
            if (mayHaveChildren(value)) {
              nodesToVisit.push(value);
            }
          }
        }
      }
    
      function ObjectLoader(obj, keys, xref) {
        this.obj = obj;
        this.keys = keys;
        this.xref = xref;
        this.refSet = null;
        this.capability = null;
      }
    
      ObjectLoader.prototype = {
        load: function ObjectLoader_load() {
          var keys = this.keys;
          this.capability = createPromiseCapability();
          // Don't walk the graph if all the data is already loaded.
          if (!(this.xref.stream instanceof ChunkedStream) ||
              this.xref.stream.getMissingChunks().length === 0) {
            this.capability.resolve();
            return this.capability.promise;
          }
    
          this.refSet = new RefSet();
          // Setup the initial nodes to visit.
          var nodesToVisit = [];
          for (var i = 0; i < keys.length; i++) {
            nodesToVisit.push(this.obj[keys[i]]);
          }
    
          this._walk(nodesToVisit);
          return this.capability.promise;
        },
    
        _walk: function ObjectLoader_walk(nodesToVisit) {
          var nodesToRevisit = [];
          var pendingRequests = [];
          // DFS walk of the object graph.
          while (nodesToVisit.length) {
            var currentNode = nodesToVisit.pop();
    
            // Only references or chunked streams can cause missing data exceptions.
            if (isRef(currentNode)) {
              // Skip nodes that have already been visited.
              if (this.refSet.has(currentNode)) {
                continue;
              }
              try {
                var ref = currentNode;
                this.refSet.put(ref);
                currentNode = this.xref.fetch(currentNode);
              } catch (e) {
                if (!(e instanceof MissingDataException)) {
                  throw e;
                }
                nodesToRevisit.push(currentNode);
                pendingRequests.push({ begin: e.begin, end: e.end });
              }
            }
            if (currentNode && currentNode.getBaseStreams) {
              var baseStreams = currentNode.getBaseStreams();
              var foundMissingData = false;
              for (var i = 0; i < baseStreams.length; i++) {
                var stream = baseStreams[i];
                if (stream.getMissingChunks && stream.getMissingChunks().length) {
                  foundMissingData = true;
                  pendingRequests.push({
                    begin: stream.start,
                    end: stream.end
                  });
                }
              }
              if (foundMissingData) {
                nodesToRevisit.push(currentNode);
              }
            }
    
            addChildren(currentNode, nodesToVisit);
          }
    
          if (pendingRequests.length) {
            this.xref.stream.manager.requestRanges(pendingRequests).then(
                function pendingRequestCallback() {
              nodesToVisit = nodesToRevisit;
              for (var i = 0; i < nodesToRevisit.length; i++) {
                var node = nodesToRevisit[i];
                // Remove any reference nodes from the currrent refset so they
                // aren't skipped when we revist them.
                if (isRef(node)) {
                  this.refSet.remove(node);
                }
              }
              this._walk(nodesToVisit);
            }.bind(this), this.capability.reject);
            return;
          }
          // Everything is loaded.
          this.refSet = null;
          this.capability.resolve();
        }
      };
    
      return ObjectLoader;
    })();
    
    exports.Catalog = Catalog;
    exports.ObjectLoader = ObjectLoader;
    exports.XRef = XRef;
    }));
    
    /* Copyright 2012 Mozilla Foundation
     *
     * Licensed under the Apache License, Version 2.0 (the "License");
     * you may not use this file except in compliance with the License.
     * You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    
    'use strict';
    
    (function (root, factory) {
    
    Damyan Mitev's avatar
    Damyan Mitev committed
      //if (typeof define === 'function' && define.amd) {
      //  define('pdfjs/core/document', ['exports', 'pdfjs/shared/util',
      //    'pdfjs/core/primitives', 'pdfjs/core/stream', 'pdfjs/core/obj',
      //    'pdfjs/core/parser', 'pdfjs/core/crypto'], factory);
    
      // } else if (typeof exports !== 'undefined') {
      //   factory(exports, require('../shared/util.js'), require('./primitives.js'),
      //     require('./stream.js'), require('./obj.js'), require('./parser.js'),
      //     require('./crypto.js'));
    
    Damyan Mitev's avatar
    Damyan Mitev committed
      //} else {
    
        factory((root.pdfjsCoreDocument = {}), root.pdfjsSharedUtil,
          root.pdfjsCorePrimitives, root.pdfjsCoreStream,
          root.pdfjsCoreObj, root.pdfjsCoreParser, root.pdfjsCoreCrypto);
    
    Damyan Mitev's avatar
    Damyan Mitev committed
      //}
    
    }(window, function (exports, sharedUtil, corePrimitives, coreStream, coreObj,
                      coreParser, coreCrypto) {
    
    var MissingDataException = sharedUtil.MissingDataException;
    var Util = sharedUtil.Util;
    var assert = sharedUtil.assert;
    var error = sharedUtil.error;
    var info = sharedUtil.info;
    var isArray = sharedUtil.isArray;
    var isArrayBuffer = sharedUtil.isArrayBuffer;
    var isString = sharedUtil.isString;
    var shadow = sharedUtil.shadow;
    var stringToBytes = sharedUtil.stringToBytes;
    var stringToPDFString = sharedUtil.stringToPDFString;
    var warn = sharedUtil.warn;
    var Dict = corePrimitives.Dict;
    var isDict = corePrimitives.isDict;
    var isName = corePrimitives.isName;
    var isStream = corePrimitives.isStream;
    var NullStream = coreStream.NullStream;
    var Stream = coreStream.Stream;
    var StreamsSequenceStream = coreStream.StreamsSequenceStream;
    var Catalog = coreObj.Catalog;
    var ObjectLoader = coreObj.ObjectLoader;
    var XRef = coreObj.XRef;
    var Lexer = coreParser.Lexer;
    var Linearization = coreParser.Linearization;
    var calculateMD5 = coreCrypto.calculateMD5;
    
    
    /**
     * The `PDFDocument` holds all the data of the PDF file. Compared to the
     * `PDFDoc`, this one doesn't have any job management code.
     * Right now there exists one PDFDocument on the main thread + one object
     * for each worker. If there is no worker support enabled, there are two
     * `PDFDocument` objects on the main thread created.
     */
    var PDFDocument = (function PDFDocumentClosure() {
      var FINGERPRINT_FIRST_BYTES = 1024;
      var EMPTY_FINGERPRINT = '\x00\x00\x00\x00\x00\x00\x00' +
        '\x00\x00\x00\x00\x00\x00\x00\x00\x00';