From 63f46a63a453b189b56e314504280248c47e0959 Mon Sep 17 00:00:00 2001 From: Zdravko Iliev <zdravko.iliev@vereign.com> Date: Tue, 26 Apr 2022 12:04:29 +0300 Subject: [PATCH] reduce parsing time for big pdfs --- dist/config.js | 10 +++++----- dist/pdfParser.js | 8 -------- src/config.ts | 10 +++++----- src/pdfParser.ts | 9 --------- 4 files changed, 10 insertions(+), 27 deletions(-) diff --git a/dist/config.js b/dist/config.js index 1164211..02ed450 100644 --- a/dist/config.js +++ b/dist/config.js @@ -7,11 +7,11 @@ exports.config = { get: { // enable or disable data extraction (all are optional and enabled by default) pages: true, - text: true, - fingerprint: true, - outline: true, - metadata: true, + text: false, + fingerprint: false, + outline: false, + metadata: false, info: true, - permissions: true, // get permissions + permissions: false, // get permissions }, }; diff --git a/dist/pdfParser.js b/dist/pdfParser.js index 733c867..501a841 100644 --- a/dist/pdfParser.js +++ b/dist/pdfParser.js @@ -26,16 +26,8 @@ class PDFparser { throw new errors_1.AppError("Only pdf file type is supported"); } try { - // await Promise.all([ - // verifyPDF(this.document), - // PdfData.extract(this.document, config), - // ]); - // console.time("verify"); const signaturesMeta = yield (0, lib_1.verifyPDF)(this.document); - // console.timeEnd("verify"); - // console.time("PdfData"); const pdfMeta = yield pdfdataextract_1.PdfData.extract(this.document, config_1.config); - // console.timeEnd("PdfData"); const result = { pages: pdfMeta.pages, title: pdfMeta.info.Title || "Unknown", diff --git a/src/config.ts b/src/config.ts index bc82917..6d59033 100644 --- a/src/config.ts +++ b/src/config.ts @@ -5,11 +5,11 @@ export const config = { get: { // enable or disable data extraction (all are optional and enabled by default) pages: true, // get number of pages - text: true, // get text of each page - fingerprint: true, // get fingerprint - outline: true, // get outline - metadata: true, // get metadata + text: false, // get text of each page + fingerprint: false, // get fingerprint + outline: false, // get outline + metadata: false, // get metadata info: true, // get info - permissions: true, // get permissions + permissions: false, // get permissions }, }; diff --git a/src/pdfParser.ts b/src/pdfParser.ts index 4791bc1..9f0ef8f 100644 --- a/src/pdfParser.ts +++ b/src/pdfParser.ts @@ -30,17 +30,8 @@ class PDFparser { } try { - // await Promise.all([ - // verifyPDF(this.document), - // PdfData.extract(this.document, config), - // ]); - // console.time("verify"); const signaturesMeta = await verifyPDF(this.document); - // console.timeEnd("verify"); - - // console.time("PdfData"); const pdfMeta = await PdfData.extract(this.document, config); - // console.timeEnd("PdfData"); const result = { pages: pdfMeta.pages, -- GitLab