Skip to content
Snippets Groups Projects
Commit 6ef22966 authored by Zdravko Iliev's avatar Zdravko Iliev
Browse files

pdf meta parser

parent 3bb7da1c
No related branches found
No related tags found
1 merge request!1Draft: Resolve "[Document Sealing] Implement PDF parser"
Pipeline #47759 passed
node_modules
.idea
yarn-error.log
dist
File added
import fs from "fs";
import path from "path";
import { describe, it, expect } from "@jest/globals";
import { add } from "../src";
import PDFparser from "../src/pdfParser";
describe("index test", () => {
it("should calculate sum", () => {
const sum = add(2, 5);
expect(sum).toEqual(7);
describe("PDF parser", () => {
it("should return pdf document metadata", async () => {
const file = fs.readFileSync(path.resolve(__dirname, "./example.pdf"));
const parser = new PDFparser(file);
const actual = await parser.getPDFMeta();
expect(actual.pages).toEqual(1);
expect(actual.title).toEqual("PDF Digital Signatures");
});
});
import { VerbosityLevel } from "pdfdataextract";
export declare const config: {
verbosity: VerbosityLevel;
get: {
pages: boolean;
text: boolean;
fingerprint: boolean;
outline: boolean;
metadata: boolean;
info: boolean;
permissions: boolean;
};
};
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.config = void 0;
const pdfdataextract_1 = require("pdfdataextract");
exports.config = {
verbosity: pdfdataextract_1.VerbosityLevel.ERRORS,
get: {
// enable or disable data extraction (all are optional and enabled by default)
pages: true,
text: true,
fingerprint: true,
outline: true,
metadata: true,
info: true,
permissions: true,
},
};
import PDFparser from "./pdfParser";
export default PDFparser;
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const pdfParser_1 = require("./pdfParser");
exports.default = pdfParser_1.default;
/// <reference types="node" />
import { IgetMetaResponse } from "./types";
declare class PDFparser {
readonly document: any;
readonly config: any;
constructor(document: Buffer);
getPDFMeta: () => Promise<IgetMetaResponse>;
}
export default PDFparser;
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
const verify_pdf_1 = require("@ninja-labs/verify-pdf");
const pdfdataextract_1 = require("pdfdataextract");
const config_1 = require("./config");
class PDFparser {
constructor(document) {
this.getPDFMeta = () => __awaiter(this, void 0, void 0, function* () {
try {
const pdfMeta = yield pdfdataextract_1.PdfData.extract(this.document, config_1.config);
const signaturesMeta = yield verify_pdf_1.default(this.document);
return {
verified: signaturesMeta.verified,
authenticity: signaturesMeta.authenticity,
integrity: signaturesMeta.integrity,
expired: signaturesMeta.expired,
meta: {
certs: signaturesMeta.certs,
},
pages: pdfMeta.pages,
fingerpring: pdfMeta.fingerprint,
creation_data: pdfMeta.info.CreationDate,
creator: pdfMeta.info.Creator,
author: pdfMeta.info.Author,
title: pdfMeta.info.Title,
description: pdfMeta.info.Keywords,
mod_date: pdfMeta.info.ModDate,
};
}
catch (error) {
console.error(error);
throw new Error("Could not get pdf metadata");
}
});
this.document = document;
this.config = config_1.config;
}
}
exports.default = PDFparser;
export interface Icert {
clientCertificate: boolean;
issuedBy: {
countryName: string;
organizationName: string;
commonName: string;
};
issuedTo: {
countryName: string;
organizationalUnitName: string;
organizationName: string;
commonName: string;
};
validityPeriod: {
notBefore: string;
notAfter: string;
};
pemCertificate: string;
}
export interface IgetMetaResponse {
verified: boolean;
authenticity: boolean;
integrity: boolean;
expired: boolean;
meta: {
certs: Array<{
Icert: any;
}>;
};
pages: number;
fingerpring: string;
creation_data: string;
creator: string;
author: string;
title: string;
description: string;
mod_date: string;
}
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
......@@ -35,5 +35,8 @@
"pre-commit": "lint-staged"
}
},
"dependencies": {}
"dependencies": {
"@ninja-labs/verify-pdf": "^0.3.9",
"pdfdataextract": "^3.2.0"
}
}
import { VerbosityLevel } from "pdfdataextract";
export const config = {
verbosity: VerbosityLevel.ERRORS, // set the verbosity level for parsing
get: {
// enable or disable data extraction (all are optional and enabled by default)
pages: true, // get number of pages
text: true, // get text of each page
fingerprint: true, // get fingerprint
outline: true, // get outline
metadata: true, // get metadata
info: true, // get info
permissions: true, // get permissions
},
};
export const add = (a: number, b: number) => {
return a + b;
};
import PDFparser from "./pdfParser";
export default PDFparser;
import verifyPDF from "@ninja-labs/verify-pdf";
import { PdfData } from "pdfdataextract";
import { config } from "./config";
import { IgetMetaResponse } from "./types";
class PDFparser {
readonly document;
readonly config;
constructor(document: Buffer) {
this.document = document;
this.config = config;
}
getPDFMeta = async (): Promise<IgetMetaResponse> => {
try {
const pdfMeta = await PdfData.extract(this.document, config);
const signaturesMeta = await verifyPDF(this.document);
return {
verified: signaturesMeta.verified,
authenticity: signaturesMeta.authenticity,
integrity: signaturesMeta.integrity,
expired: signaturesMeta.expired,
meta: {
certs: signaturesMeta.certs,
},
pages: pdfMeta.pages,
fingerpring: pdfMeta.fingerprint,
creation_data: pdfMeta.info.CreationDate,
creator: pdfMeta.info.Creator,
author: pdfMeta.info.Author,
title: pdfMeta.info.Title,
description: pdfMeta.info.Keywords,
mod_date: pdfMeta.info.ModDate,
};
} catch (error) {
console.error(error);
throw new Error("Could not get pdf metadata");
}
};
}
export default PDFparser;
export interface Icert {
clientCertificate: boolean;
issuedBy: {
countryName: string;
organizationName: string;
commonName: string;
};
issuedTo: {
countryName: string;
organizationalUnitName: string;
organizationName: string;
commonName: string;
};
validityPeriod: {
notBefore: string;
notAfter: string;
};
pemCertificate: string;
}
export interface IgetMetaResponse {
verified: boolean;
authenticity: boolean;
integrity: boolean;
expired: boolean;
meta: {
certs: Array<{ Icert }>;
};
pages: number;
fingerpring: string;
creation_data: string;
creator: string;
author: string;
title: string;
description: string;
mod_date: string;
}
......@@ -1070,6 +1070,13 @@
"@types/yargs" "^15.0.0"
chalk "^4.0.0"
"@ninja-labs/verify-pdf@^0.3.9":
version "0.3.9"
resolved "https://registry.yarnpkg.com/@ninja-labs/verify-pdf/-/verify-pdf-0.3.9.tgz#46929f917b3452cb0cf28c833bce7dc9d1e7ae8f"
integrity sha512-TXuax5oaFoAICjEmDctxb28DhfeQkVe2vev7kw6GieHEliuCE5b3K5+puzSj7KcUr3EAYPtzQB740t7GCNrlxw==
dependencies:
node-forge "^0.10.0"
"@sinonjs/commons@^1.7.0":
version "1.8.1"
resolved "https://registry.yarnpkg.com/@sinonjs/commons/-/commons-1.8.1.tgz#e7df00f98a203324f6dc7cc606cad9d4a8ab2217"
......@@ -3662,6 +3669,11 @@ nice-try@^1.0.4:
resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366"
integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==
node-forge@^0.10.0:
version "0.10.0"
resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3"
integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA==
node-int64@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
......@@ -3909,6 +3921,18 @@ path-type@^4.0.0:
resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
pdfdataextract@^3.2.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/pdfdataextract/-/pdfdataextract-3.2.0.tgz#c40d59fb0de4ed6e63a1da08f87d4f4550e841ec"
integrity sha512-t4W7h+cdr/aefdftzxmf+3w4ntVO70OlOFAGgH2zrbc+lDmYKIzAUXJhP+zpIqK6SnkRnZrQOC0fv/sejUJnrg==
dependencies:
pdfjs-dist "2.10.377"
pdfjs-dist@2.10.377:
version "2.10.377"
resolved "https://registry.yarnpkg.com/pdfjs-dist/-/pdfjs-dist-2.10.377.tgz#feadc9f31bf1790795994e54b18930974cf4970a"
integrity sha512-i0jRShtvgfsVQUNCoFYH4SVhPO3U0yhtiFLfZ0RR0B+68N+Vnwq+8B3cjWjLEwWGh8wg1XQ/sYMYKUlHn/Qpsw==
performance-now@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment