Newer
Older
14001
14002
14003
14004
14005
14006
14007
14008
14009
14010
14011
14012
14013
14014
14015
14016
14017
14018
14019
14020
14021
14022
14023
14024
14025
14026
14027
14028
14029
14030
14031
14032
14033
14034
14035
14036
14037
14038
14039
14040
14041
14042
14043
14044
14045
14046
14047
14048
14049
14050
14051
14052
14053
14054
14055
14056
14057
14058
14059
14060
14061
14062
14063
14064
14065
14066
14067
14068
14069
14070
14071
14072
14073
14074
14075
14076
14077
14078
14079
14080
14081
14082
14083
14084
14085
14086
14087
14088
14089
14090
14091
14092
14093
14094
14095
14096
14097
14098
14099
14100
14101
14102
14103
14104
14105
14106
14107
14108
14109
14110
14111
14112
14113
14114
14115
14116
14117
14118
14119
14120
14121
14122
14123
14124
14125
14126
14127
14128
14129
14130
14131
14132
14133
14134
14135
14136
14137
14138
14139
14140
14141
14142
14143
14144
14145
14146
14147
14148
14149
14150
14151
14152
14153
14154
14155
14156
14157
14158
14159
14160
14161
14162
14163
14164
14165
14166
14167
14168
14169
14170
14171
14172
14173
14174
14175
14176
14177
14178
14179
14180
14181
14182
14183
14184
14185
14186
14187
14188
14189
14190
14191
14192
14193
14194
14195
14196
14197
14198
14199
14200
14201
14202
14203
14204
14205
14206
14207
14208
14209
14210
14211
14212
14213
14214
14215
14216
14217
14218
14219
14220
14221
14222
14223
14224
14225
14226
14227
14228
14229
14230
14231
14232
14233
14234
14235
14236
14237
14238
14239
14240
14241
14242
14243
14244
14245
14246
14247
14248
14249
14250
14251
14252
14253
14254
14255
14256
14257
14258
14259
14260
14261
14262
14263
14264
14265
14266
14267
14268
14269
14270
14271
14272
14273
14274
14275
14276
14277
14278
14279
14280
14281
14282
14283
14284
14285
14286
14287
14288
14289
14290
14291
14292
14293
14294
14295
14296
14297
14298
14299
14300
14301
14302
14303
14304
14305
14306
14307
14308
14309
14310
14311
14312
14313
14314
14315
14316
14317
14318
14319
14320
14321
14322
14323
14324
14325
14326
14327
14328
14329
14330
14331
14332
14333
14334
14335
14336
14337
14338
14339
14340
14341
14342
14343
14344
14345
14346
14347
14348
14349
14350
14351
14352
14353
14354
14355
14356
14357
14358
14359
14360
14361
14362
14363
14364
14365
14366
14367
14368
14369
14370
14371
14372
14373
14374
14375
14376
14377
14378
14379
14380
14381
14382
14383
14384
14385
14386
14387
14388
14389
14390
14391
14392
14393
14394
14395
14396
14397
14398
14399
14400
14401
14402
14403
14404
14405
14406
14407
14408
14409
14410
14411
14412
14413
14414
14415
14416
14417
14418
14419
14420
14421
14422
14423
14424
14425
14426
14427
14428
14429
14430
14431
14432
14433
14434
14435
14436
14437
14438
14439
14440
14441
14442
14443
14444
14445
14446
14447
14448
14449
14450
14451
14452
14453
14454
14455
14456
14457
14458
14459
14460
14461
14462
14463
14464
14465
14466
14467
14468
14469
14470
14471
14472
14473
14474
14475
14476
14477
14478
14479
14480
14481
14482
14483
14484
14485
14486
14487
14488
14489
14490
14491
14492
14493
14494
14495
14496
14497
14498
14499
14500
14501
14502
14503
14504
14505
14506
14507
14508
14509
14510
14511
14512
14513
14514
14515
14516
14517
14518
14519
14520
14521
14522
14523
14524
14525
14526
14527
14528
14529
14530
14531
14532
14533
14534
14535
14536
14537
14538
14539
14540
14541
14542
14543
14544
14545
14546
14547
14548
14549
14550
14551
14552
14553
14554
14555
14556
14557
14558
14559
14560
14561
14562
14563
14564
14565
14566
14567
14568
14569
14570
14571
14572
14573
14574
14575
14576
14577
14578
14579
14580
14581
14582
14583
14584
14585
14586
14587
14588
14589
14590
14591
14592
14593
14594
14595
14596
14597
14598
14599
14600
14601
14602
14603
14604
14605
14606
14607
14608
14609
14610
14611
14612
14613
14614
14615
14616
14617
14618
14619
14620
14621
14622
14623
14624
14625
14626
14627
14628
14629
14630
14631
14632
14633
14634
14635
14636
14637
14638
14639
kidsOrNames = xref.fetchIfRef(kids[m]);
break;
}
}
if (l > r) {
return null;
}
}
// If we get here, then we have found the right entry. Now
// go through the named destinations in the Named dictionary
// until we find the exact destination we're looking for.
var names = kidsOrNames.get("Names");
if (isArray(names)) {
// Perform a binary search to reduce the lookup time.
l = 0;
r = names.length - 2;
while (l <= r) {
// Check only even indices (0, 2, 4, ...) because the
// odd indices contain the actual D array.
m = (l + r) & ~1;
if (destinationId < xref.fetchIfRef(names[m])) {
r = m - 2;
} else if (destinationId > xref.fetchIfRef(names[m])) {
l = m + 2;
} else {
return xref.fetchIfRef(names[m + 1]);
}
}
}
return null;
}
};
return NameTree;
})();
/**
* "A PDF file can refer to the contents of another file by using a File
* Specification (PDF 1.1)", see the spec (7.11) for more details.
* NOTE: Only embedded files are supported (as part of the attachments support)
* TODO: support the 'URL' file system (with caching if !/V), portable
* collections attributes and related files (/RF)
*/
var FileSpec = (function FileSpecClosure() {
function FileSpec(root, xref) {
if (!root || !isDict(root)) {
return;
}
this.xref = xref;
this.root = root;
if (root.has("FS")) {
this.fs = root.get("FS");
}
this.description = root.has("Desc")
? stringToPDFString(root.get("Desc"))
: "";
if (root.has("RF")) {
warn("Related file specifications are not supported");
}
this.contentAvailable = true;
if (!root.has("EF")) {
this.contentAvailable = false;
warn("Non-embedded file specifications are not supported");
}
}
function pickPlatformItem(dict) {
// Look for the filename in this order:
// UF, F, Unix, Mac, DOS
if (dict.has("UF")) {
return dict.get("UF");
} else if (dict.has("F")) {
return dict.get("F");
} else if (dict.has("Unix")) {
return dict.get("Unix");
} else if (dict.has("Mac")) {
return dict.get("Mac");
} else if (dict.has("DOS")) {
return dict.get("DOS");
} else {
return null;
}
}
FileSpec.prototype = {
get filename() {
if (!this._filename && this.root) {
var filename = pickPlatformItem(this.root) || "unnamed";
this._filename = stringToPDFString(filename)
.replace(/\\\\/g, "\\")
.replace(/\\\//g, "/")
.replace(/\\/g, "/");
}
return this._filename;
},
get content() {
if (!this.contentAvailable) {
return null;
}
if (!this.contentRef && this.root) {
this.contentRef = pickPlatformItem(this.root.get("EF"));
}
var content = null;
if (this.contentRef) {
var xref = this.xref;
var fileObj = xref.fetchIfRef(this.contentRef);
if (fileObj && isStream(fileObj)) {
content = fileObj.getBytes();
} else {
warn(
"Embedded file specification points to non-existing/invalid " +
"content"
);
}
} else {
warn("Embedded file specification does not have a content");
}
return content;
},
get serializable() {
return {
filename: this.filename,
content: this.content
};
}
};
return FileSpec;
})();
/**
* A helper for loading missing data in object graphs. It traverses the graph
* depth first and queues up any objects that have missing data. Once it has
* has traversed as many objects that are available it attempts to bundle the
* missing data requests and then resume from the nodes that weren't ready.
*
* NOTE: It provides protection from circular references by keeping track of
* of loaded references. However, you must be careful not to load any graphs
* that have references to the catalog or other pages since that will cause the
* entire PDF document object graph to be traversed.
*/
var ObjectLoader = (function() {
function mayHaveChildren(value) {
return isRef(value) || isDict(value) || isArray(value) || isStream(value);
}
function addChildren(node, nodesToVisit) {
var value;
if (isDict(node) || isStream(node)) {
var map;
if (isDict(node)) {
map = node.map;
} else {
map = node.dict.map;
}
for (var key in map) {
value = map[key];
if (mayHaveChildren(value)) {
nodesToVisit.push(value);
}
}
} else if (isArray(node)) {
for (var i = 0, ii = node.length; i < ii; i++) {
value = node[i];
if (mayHaveChildren(value)) {
nodesToVisit.push(value);
}
}
}
}
function ObjectLoader(obj, keys, xref) {
this.obj = obj;
this.keys = keys;
this.xref = xref;
this.refSet = null;
this.capability = null;
}
ObjectLoader.prototype = {
load: function ObjectLoader_load() {
var keys = this.keys;
this.capability = createPromiseCapability();
// Don't walk the graph if all the data is already loaded.
if (
!(this.xref.stream instanceof ChunkedStream) ||
this.xref.stream.getMissingChunks().length === 0
) {
this.capability.resolve();
return this.capability.promise;
}
this.refSet = new RefSet();
// Setup the initial nodes to visit.
var nodesToVisit = [];
for (var i = 0; i < keys.length; i++) {
nodesToVisit.push(this.obj[keys[i]]);
}
this._walk(nodesToVisit);
return this.capability.promise;
},
_walk: function ObjectLoader_walk(nodesToVisit) {
var nodesToRevisit = [];
var pendingRequests = [];
// DFS walk of the object graph.
while (nodesToVisit.length) {
var currentNode = nodesToVisit.pop();
// Only references or chunked streams can cause missing data exceptions.
if (isRef(currentNode)) {
// Skip nodes that have already been visited.
if (this.refSet.has(currentNode)) {
continue;
}
try {
var ref = currentNode;
this.refSet.put(ref);
currentNode = this.xref.fetch(currentNode);
} catch (e) {
if (!(e instanceof MissingDataException)) {
throw e;
}
nodesToRevisit.push(currentNode);
pendingRequests.push({ begin: e.begin, end: e.end });
}
}
if (currentNode && currentNode.getBaseStreams) {
var baseStreams = currentNode.getBaseStreams();
var foundMissingData = false;
for (var i = 0; i < baseStreams.length; i++) {
var stream = baseStreams[i];
if (stream.getMissingChunks && stream.getMissingChunks().length) {
foundMissingData = true;
pendingRequests.push({
begin: stream.start,
end: stream.end
});
}
}
if (foundMissingData) {
nodesToRevisit.push(currentNode);
}
}
addChildren(currentNode, nodesToVisit);
}
if (pendingRequests.length) {
this.xref.stream.manager.requestRanges(pendingRequests).then(
function pendingRequestCallback() {
nodesToVisit = nodesToRevisit;
for (var i = 0; i < nodesToRevisit.length; i++) {
var node = nodesToRevisit[i];
// Remove any reference nodes from the currrent refset so they
// aren't skipped when we revist them.
if (isRef(node)) {
this.refSet.remove(node);
}
}
this._walk(nodesToVisit);
}.bind(this),
this.capability.reject
);
return;
}
// Everything is loaded.
this.refSet = null;
this.capability.resolve();
}
};
return ObjectLoader;
})();
exports.Catalog = Catalog;
exports.ObjectLoader = ObjectLoader;
exports.XRef = XRef;
});
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
("use strict");
(function(root, factory) {
//if (typeof define === 'function' && define.amd) {
// define('pdfjs/core/document', ['exports', 'pdfjs/shared/util',
// 'pdfjs/core/primitives', 'pdfjs/core/stream', 'pdfjs/core/obj',
// 'pdfjs/core/parser', 'pdfjs/core/crypto'], factory);
// } else if (typeof exports !== 'undefined') {
// factory(exports, require('../shared/util.js'), require('./primitives.js'),
// require('./stream.js'), require('./obj.js'), require('./parser.js'),
// require('./crypto.js'));
//} else {
factory(
(root.pdfjsCoreDocument = {}),
root.pdfjsSharedUtil,
root.pdfjsCorePrimitives,
root.pdfjsCoreStream,
root.pdfjsCoreObj,
root.pdfjsCoreParser,
root.pdfjsCoreCrypto
);
//}
})(window, function(
exports,
sharedUtil,
corePrimitives,
coreStream,
coreObj,
coreParser,
coreCrypto
) {
var MissingDataException = sharedUtil.MissingDataException;
var Util = sharedUtil.Util;
var assert = sharedUtil.assert;
var error = sharedUtil.error;
var info = sharedUtil.info;
var isArray = sharedUtil.isArray;
var isArrayBuffer = sharedUtil.isArrayBuffer;
var isString = sharedUtil.isString;
var shadow = sharedUtil.shadow;
var stringToBytes = sharedUtil.stringToBytes;
var stringToPDFString = sharedUtil.stringToPDFString;
var warn = sharedUtil.warn;
var Dict = corePrimitives.Dict;
var isDict = corePrimitives.isDict;
var isName = corePrimitives.isName;
var isStream = corePrimitives.isStream;
var NullStream = coreStream.NullStream;
var Stream = coreStream.Stream;
var StreamsSequenceStream = coreStream.StreamsSequenceStream;
var Catalog = coreObj.Catalog;
var ObjectLoader = coreObj.ObjectLoader;
var XRef = coreObj.XRef;
var Lexer = coreParser.Lexer;
var Linearization = coreParser.Linearization;
var calculateMD5 = coreCrypto.calculateMD5;
/**
* The `PDFDocument` holds all the data of the PDF file. Compared to the
* `PDFDoc`, this one doesn't have any job management code.
* Right now there exists one PDFDocument on the main thread + one object
* for each worker. If there is no worker support enabled, there are two
* `PDFDocument` objects on the main thread created.
*/
var PDFDocument = (function PDFDocumentClosure() {
var FINGERPRINT_FIRST_BYTES = 1024;
var EMPTY_FINGERPRINT =
"\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
function PDFDocument(pdfManager, arg, password) {
if (isStream(arg)) {
init.call(this, pdfManager, arg, password);
} else if (isArrayBuffer(arg)) {
init.call(this, pdfManager, new Stream(arg), password);
} else {
error("PDFDocument: Unknown argument type");
}
}
function init(pdfManager, stream, password) {
assert(stream.length > 0, "stream must have data");
this.pdfManager = pdfManager;
this.stream = stream;
var xref = new XRef(this.stream, password, pdfManager);
this.xref = xref;
}
function find(stream, needle, limit, backwards) {
var pos = stream.pos;
var end = stream.end;
var strBuf = [];
if (pos + limit > end) {
limit = end - pos;
}
for (var n = 0; n < limit; ++n) {
strBuf.push(String.fromCharCode(stream.getByte()));
}
var str = strBuf.join("");
stream.pos = pos;
var index = backwards ? str.lastIndexOf(needle) : str.indexOf(needle);
if (index === -1) {
return false; /* not found */
}
stream.pos += index;
return true; /* found */
}
var DocumentInfoValidators = {
get entries() {
// Lazily build this since all the validation functions below are not
// defined until after this file loads.
return shadow(this, "entries", {
Title: isString,
Author: isString,
Subject: isString,
Keywords: isString,
Creator: isString,
Producer: isString,
CreationDate: isString,
ModDate: isString,
Trapped: isName
});
}
};
PDFDocument.prototype = {
parse: function PDFDocument_parse(recoveryMode) {
this.setup(recoveryMode);
var version = this.catalog.catDict.get("Version");
if (isName(version)) {
this.pdfFormatVersion = version.name;
}
try {
// checking if AcroForm is present
this.acroForm = this.catalog.catDict.get("AcroForm");
if (this.acroForm) {
this.xfa = this.acroForm.get("XFA");
var fields = this.acroForm.get("Fields");
if (
(!fields || !isArray(fields) || fields.length === 0) &&
!this.xfa
) {
// no fields and no XFA -- not a form (?)
this.acroForm = null;
}
}
} catch (ex) {
info("Something wrong with AcroForm entry");
this.acroForm = null;
}
},
get linearization() {
var linearization = null;
if (this.stream.length) {
try {
linearization = Linearization.create(this.stream);
} catch (err) {
if (err instanceof MissingDataException) {
throw err;
}
info(err);
}
}
// shadow the prototype getter with a data property
return shadow(this, "linearization", linearization);
},
get startXRef() {
var stream = this.stream;
var startXRef = 0;
var linearization = this.linearization;
if (linearization) {
// Find end of first obj.
stream.reset();
if (find(stream, "endobj", 1024)) {
startXRef = stream.pos + 6;
}
} else {
// Find startxref by jumping backward from the end of the file.
var step = 1024;
var found = false,
pos = stream.end;
while (!found && pos > 0) {
pos -= step - "startxref".length;
if (pos < 0) {
pos = 0;
}
stream.pos = pos;
found = find(stream, "startxref", step, true);
}
if (found) {
stream.skip(9);
var ch;
do {
ch = stream.getByte();
} while (Lexer.isSpace(ch));
var str = "";
while (ch >= 0x20 && ch <= 0x39) {
// < '9'
str += String.fromCharCode(ch);
ch = stream.getByte();
}
startXRef = parseInt(str, 10);
if (isNaN(startXRef)) {
startXRef = 0;
}
}
}
// shadow the prototype getter with a data property
return shadow(this, "startXRef", startXRef);
},
get mainXRefEntriesOffset() {
var mainXRefEntriesOffset = 0;
var linearization = this.linearization;
if (linearization) {
mainXRefEntriesOffset = linearization.mainXRefEntriesOffset;
}
// shadow the prototype getter with a data property
return shadow(this, "mainXRefEntriesOffset", mainXRefEntriesOffset);
},
// Find the header, remove leading garbage and setup the stream
// starting from the header.
checkHeader: function PDFDocument_checkHeader() {
var stream = this.stream;
stream.reset();
if (find(stream, "%PDF-", 1024)) {
// Found the header, trim off any garbage before it.
stream.moveStart();
// Reading file format version
var MAX_VERSION_LENGTH = 12;
var version = "",
ch;
while ((ch = stream.getByte()) > 0x20) {
// SPACE
if (version.length >= MAX_VERSION_LENGTH) {
break;
}
version += String.fromCharCode(ch);
}
if (!this.pdfFormatVersion) {
// removing "%PDF-"-prefix
this.pdfFormatVersion = version.substring(5);
}
return;
}
// May not be a PDF file, continue anyway.
},
parseStartXRef: function PDFDocument_parseStartXRef() {
var startXRef = this.startXRef;
this.xref.setStartXRef(startXRef);
},
setup: function PDFDocument_setup(recoveryMode) {
this.xref.parse(recoveryMode);
var self = this;
this.catalog = new Catalog(this.pdfManager, this.xref, false);
},
get numPages() {
var linearization = this.linearization;
var num = linearization
? linearization.numPages
: this.catalog.numPages;
// shadow the prototype getter
return shadow(this, "numPages", num);
},
get documentInfo() {
var docInfo = {
PDFFormatVersion: this.pdfFormatVersion,
IsAcroFormPresent: !!this.acroForm,
IsXFAPresent: !!this.xfa
};
var infoDict;
try {
infoDict = this.xref.trailer.get("Info");
} catch (err) {
info("The document information dictionary is invalid.");
}
if (infoDict) {
var validEntries = DocumentInfoValidators.entries;
// Only fill the document info with valid entries from the spec.
for (var key in validEntries) {
if (infoDict.has(key)) {
var value = infoDict.get(key);
// Make sure the value conforms to the spec.
if (validEntries[key](value)) {
docInfo[key] =
typeof value !== "string" ? value : stringToPDFString(value);
} else {
info('Bad value in document info for "' + key + '"');
}
}
}
}
return shadow(this, "documentInfo", docInfo);
},
get fingerprint() {
var xref = this.xref,
hash,
fileID = "";
var idArray = xref.trailer.get("ID");
if (
idArray &&
isArray(idArray) &&
idArray[0] &&
isString(idArray[0]) &&
idArray[0] !== EMPTY_FINGERPRINT
) {
hash = stringToBytes(idArray[0]);
} else {
if (this.stream.ensureRange) {
this.stream.ensureRange(
0,
Math.min(FINGERPRINT_FIRST_BYTES, this.stream.end)
);
}
hash = calculateMD5(
this.stream.bytes.subarray(0, FINGERPRINT_FIRST_BYTES),
0,
FINGERPRINT_FIRST_BYTES
);
}
for (var i = 0, n = hash.length; i < n; i++) {
var hex = hash[i].toString(16);
fileID += hex.length === 1 ? "0" + hex : hex;
}
return shadow(this, "fingerprint", fileID);
},
getPage: function PDFDocument_getPage(pageIndex) {
return this.catalog.getPage(pageIndex);
},
cleanup: function PDFDocument_cleanup() {
return this.catalog.cleanup();
}
};
return PDFDocument;
})();
exports.PDFDocument = PDFDocument;
});