diff --git a/.gitignore b/.gitignore index 24ea57156b5d89bfebc461a6d85b1ea24ddcef28..f3c728e5b10403b0ed481358aad9b9b5301693d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ node_modules .idea yarn-error.log -dist diff --git a/__tests__/example.pdf b/__tests__/example.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4b628d3443868d3ae88403ef355431dbfaef0e94 Binary files /dev/null and b/__tests__/example.pdf differ diff --git a/__tests__/index.test.ts b/__tests__/index.test.ts index 60b585e8ed08cc89c1db38196e196687fc6c7401..f35d0d6ad74859cb9a4105d5e4e6bcbf922acf1c 100644 --- a/__tests__/index.test.ts +++ b/__tests__/index.test.ts @@ -1,9 +1,17 @@ +import fs from "fs"; +import path from "path"; import { describe, it, expect } from "@jest/globals"; -import { add } from "../src"; +import PDFparser from "../src/pdfParser"; -describe("index test", () => { - it("should calculate sum", () => { - const sum = add(2, 5); - expect(sum).toEqual(7); +describe("PDF parser", () => { + it("should return pdf document metadata", async () => { + const file = fs.readFileSync(path.resolve(__dirname, "./example.pdf")); + + const parser = new PDFparser(file); + + const actual = await parser.getPDFMeta(); + + expect(actual.pages).toEqual(1); + expect(actual.title).toEqual("PDF Digital Signatures"); }); }); diff --git a/dist/config.d.ts b/dist/config.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..070fb3fd7b849ccb439b89b04937856a636d0de0 --- /dev/null +++ b/dist/config.d.ts @@ -0,0 +1,13 @@ +import { VerbosityLevel } from "pdfdataextract"; +export declare const config: { + verbosity: VerbosityLevel; + get: { + pages: boolean; + text: boolean; + fingerprint: boolean; + outline: boolean; + metadata: boolean; + info: boolean; + permissions: boolean; + }; +}; diff --git a/dist/config.js b/dist/config.js new file mode 100644 index 0000000000000000000000000000000000000000..8dfad7cb8958a5dd9776190d4b31139c99c9244d --- /dev/null +++ b/dist/config.js @@ -0,0 +1,17 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.config = void 0; +const pdfdataextract_1 = require("pdfdataextract"); +exports.config = { + verbosity: pdfdataextract_1.VerbosityLevel.ERRORS, + get: { + // enable or disable data extraction (all are optional and enabled by default) + pages: true, + text: true, + fingerprint: true, + outline: true, + metadata: true, + info: true, + permissions: true, + }, +}; diff --git a/dist/index.d.ts b/dist/index.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..98cc103cb18d16c837fb8b9d52065a8fbbb985e0 --- /dev/null +++ b/dist/index.d.ts @@ -0,0 +1,2 @@ +import PDFparser from "./pdfParser"; +export default PDFparser; diff --git a/dist/index.js b/dist/index.js new file mode 100644 index 0000000000000000000000000000000000000000..8ee3e72e2482671ef48d496d0c2c302a0d8a633a --- /dev/null +++ b/dist/index.js @@ -0,0 +1,4 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const pdfParser_1 = require("./pdfParser"); +exports.default = pdfParser_1.default; diff --git a/dist/pdfParser.d.ts b/dist/pdfParser.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..7cf3fa1f704c09bf3aabdf59cfee08c219013112 --- /dev/null +++ b/dist/pdfParser.d.ts @@ -0,0 +1,9 @@ +/// <reference types="node" /> +import { IgetMetaResponse } from "./types"; +declare class PDFparser { + readonly document: any; + readonly config: any; + constructor(document: Buffer); + getPDFMeta: () => Promise<IgetMetaResponse>; +} +export default PDFparser; diff --git a/dist/pdfParser.js b/dist/pdfParser.js new file mode 100644 index 0000000000000000000000000000000000000000..45a9adc60eebb66b19dc0a9bbf58632d4d7f2af7 --- /dev/null +++ b/dist/pdfParser.js @@ -0,0 +1,48 @@ +"use strict"; +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +Object.defineProperty(exports, "__esModule", { value: true }); +const verify_pdf_1 = require("@ninja-labs/verify-pdf"); +const pdfdataextract_1 = require("pdfdataextract"); +const config_1 = require("./config"); +class PDFparser { + constructor(document) { + this.getPDFMeta = () => __awaiter(this, void 0, void 0, function* () { + try { + const pdfMeta = yield pdfdataextract_1.PdfData.extract(this.document, config_1.config); + const signaturesMeta = yield verify_pdf_1.default(this.document); + return { + verified: signaturesMeta.verified, + authenticity: signaturesMeta.authenticity, + integrity: signaturesMeta.integrity, + expired: signaturesMeta.expired, + meta: { + certs: signaturesMeta.certs, + }, + pages: pdfMeta.pages, + fingerpring: pdfMeta.fingerprint, + creation_data: pdfMeta.info.CreationDate, + creator: pdfMeta.info.Creator, + author: pdfMeta.info.Author, + title: pdfMeta.info.Title, + description: pdfMeta.info.Keywords, + mod_date: pdfMeta.info.ModDate, + }; + } + catch (error) { + console.error(error); + throw new Error("Could not get pdf metadata"); + } + }); + this.document = document; + this.config = config_1.config; + } +} +exports.default = PDFparser; diff --git a/dist/types.d.ts b/dist/types.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..eb3c76f28d28ac6b352a0626efee441924a1fcb3 --- /dev/null +++ b/dist/types.d.ts @@ -0,0 +1,38 @@ +export interface Icert { + clientCertificate: boolean; + issuedBy: { + countryName: string; + organizationName: string; + commonName: string; + }; + issuedTo: { + countryName: string; + organizationalUnitName: string; + organizationName: string; + commonName: string; + }; + validityPeriod: { + notBefore: string; + notAfter: string; + }; + pemCertificate: string; +} +export interface IgetMetaResponse { + verified: boolean; + authenticity: boolean; + integrity: boolean; + expired: boolean; + meta: { + certs: Array<{ + Icert: any; + }>; + }; + pages: number; + fingerpring: string; + creation_data: string; + creator: string; + author: string; + title: string; + description: string; + mod_date: string; +} diff --git a/dist/types.js b/dist/types.js new file mode 100644 index 0000000000000000000000000000000000000000..c8ad2e549bdc6801e0d1c80b0308d4b9bd4985ce --- /dev/null +++ b/dist/types.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/package.json b/package.json index e6d39b9e86d34595391638ec7847619f1e800990..e84c325e6ebbcadd6c40f8bd08d2b9322a9481ce 100644 --- a/package.json +++ b/package.json @@ -35,5 +35,8 @@ "pre-commit": "lint-staged" } }, - "dependencies": {} + "dependencies": { + "@ninja-labs/verify-pdf": "^0.3.9", + "pdfdataextract": "^3.2.0" + } } diff --git a/src/config.ts b/src/config.ts new file mode 100644 index 0000000000000000000000000000000000000000..bc829171bb106d99a361e97207ac7bf22c685a7e --- /dev/null +++ b/src/config.ts @@ -0,0 +1,15 @@ +import { VerbosityLevel } from "pdfdataextract"; + +export const config = { + verbosity: VerbosityLevel.ERRORS, // set the verbosity level for parsing + get: { + // enable or disable data extraction (all are optional and enabled by default) + pages: true, // get number of pages + text: true, // get text of each page + fingerprint: true, // get fingerprint + outline: true, // get outline + metadata: true, // get metadata + info: true, // get info + permissions: true, // get permissions + }, +}; diff --git a/src/index.ts b/src/index.ts index c68b57a0ac05eb50c699e634883700ec4398a6cd..bb506d9fd592b81455896a441677eafefcb600b0 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,3 @@ -export const add = (a: number, b: number) => { - return a + b; -}; +import PDFparser from "./pdfParser"; + +export default PDFparser; diff --git a/src/pdfParser.ts b/src/pdfParser.ts new file mode 100644 index 0000000000000000000000000000000000000000..4a89503a8cc0ca1ee1fa69000b38c324e4f248c2 --- /dev/null +++ b/src/pdfParser.ts @@ -0,0 +1,44 @@ +import verifyPDF from "@ninja-labs/verify-pdf"; +import { PdfData } from "pdfdataextract"; +import { config } from "./config"; +import { IgetMetaResponse } from "./types"; + +class PDFparser { + readonly document; + readonly config; + + constructor(document: Buffer) { + this.document = document; + this.config = config; + } + + getPDFMeta = async (): Promise<IgetMetaResponse> => { + try { + const pdfMeta = await PdfData.extract(this.document, config); + const signaturesMeta = await verifyPDF(this.document); + + return { + verified: signaturesMeta.verified, + authenticity: signaturesMeta.authenticity, + integrity: signaturesMeta.integrity, + expired: signaturesMeta.expired, + meta: { + certs: signaturesMeta.certs, + }, + pages: pdfMeta.pages, + fingerpring: pdfMeta.fingerprint, + creation_data: pdfMeta.info.CreationDate, + creator: pdfMeta.info.Creator, + author: pdfMeta.info.Author, + title: pdfMeta.info.Title, + description: pdfMeta.info.Keywords, + mod_date: pdfMeta.info.ModDate, + }; + } catch (error) { + console.error(error); + throw new Error("Could not get pdf metadata"); + } + }; +} + +export default PDFparser; diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000000000000000000000000000000000000..7a21941de422c200ff33a0db689ff0afcac5bad9 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,37 @@ +export interface Icert { + clientCertificate: boolean; + issuedBy: { + countryName: string; + organizationName: string; + commonName: string; + }; + issuedTo: { + countryName: string; + organizationalUnitName: string; + organizationName: string; + commonName: string; + }; + validityPeriod: { + notBefore: string; + notAfter: string; + }; + pemCertificate: string; +} + +export interface IgetMetaResponse { + verified: boolean; + authenticity: boolean; + integrity: boolean; + expired: boolean; + meta: { + certs: Array<{ Icert }>; + }; + pages: number; + fingerpring: string; + creation_data: string; + creator: string; + author: string; + title: string; + description: string; + mod_date: string; +} diff --git a/yarn.lock b/yarn.lock index 1df3a5f87b55f45d06bac1c82c9136084ebb6031..f58a7813763ce0b3f07f9759e8fd2ef1a4dbdfa8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1070,6 +1070,13 @@ "@types/yargs" "^15.0.0" chalk "^4.0.0" +"@ninja-labs/verify-pdf@^0.3.9": + version "0.3.9" + resolved "https://registry.yarnpkg.com/@ninja-labs/verify-pdf/-/verify-pdf-0.3.9.tgz#46929f917b3452cb0cf28c833bce7dc9d1e7ae8f" + integrity sha512-TXuax5oaFoAICjEmDctxb28DhfeQkVe2vev7kw6GieHEliuCE5b3K5+puzSj7KcUr3EAYPtzQB740t7GCNrlxw== + dependencies: + node-forge "^0.10.0" + "@sinonjs/commons@^1.7.0": version "1.8.1" resolved "https://registry.yarnpkg.com/@sinonjs/commons/-/commons-1.8.1.tgz#e7df00f98a203324f6dc7cc606cad9d4a8ab2217" @@ -3662,6 +3669,11 @@ nice-try@^1.0.4: resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366" integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ== +node-forge@^0.10.0: + version "0.10.0" + resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3" + integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA== + node-int64@^0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b" @@ -3909,6 +3921,18 @@ path-type@^4.0.0: resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b" integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw== +pdfdataextract@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/pdfdataextract/-/pdfdataextract-3.2.0.tgz#c40d59fb0de4ed6e63a1da08f87d4f4550e841ec" + integrity sha512-t4W7h+cdr/aefdftzxmf+3w4ntVO70OlOFAGgH2zrbc+lDmYKIzAUXJhP+zpIqK6SnkRnZrQOC0fv/sejUJnrg== + dependencies: + pdfjs-dist "2.10.377" + +pdfjs-dist@2.10.377: + version "2.10.377" + resolved "https://registry.yarnpkg.com/pdfjs-dist/-/pdfjs-dist-2.10.377.tgz#feadc9f31bf1790795994e54b18930974cf4970a" + integrity sha512-i0jRShtvgfsVQUNCoFYH4SVhPO3U0yhtiFLfZ0RR0B+68N+Vnwq+8B3cjWjLEwWGh8wg1XQ/sYMYKUlHn/Qpsw== + performance-now@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"