`_
documentation for details.
.. toctree::
:maxdepth: 2
:caption: Contents:
modules
endesive-2.19.1/docs/make.bat 0000664 0000000 0000000 00000001375 15042366745 0015765 0 ustar 00root root 0000000 0000000 @ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
endesive-2.19.1/docs/modules.rst 0000664 0000000 0000000 00000000075 15042366745 0016556 0 ustar 00root root 0000000 0000000 endesive
========
.. toctree::
:maxdepth: 4
endesive
endesive-2.19.1/endesive/ 0000775 0000000 0000000 00000000000 15042366745 0015224 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/__init__.py 0000664 0000000 0000000 00000000240 15042366745 0017331 0 ustar 00root root 0000000 0000000 """Python ENDESIVE library."""
__author__ = 'Grzegorz Makarewicz'
__license__ = 'MIT'
__version__ = '2.19.0'
__all__ = [__author__, __license__, __version__]
endesive-2.19.1/endesive/email/ 0000775 0000000 0000000 00000000000 15042366745 0016313 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/email/__init__.py 0000664 0000000 0000000 00000000204 15042366745 0020420 0 ustar 00root root 0000000 0000000 # *-* coding: utf-8 *-*
from .decrypt import decrypt
from .encrypt import encrypt
from .sign import sign
from .verify import verify
endesive-2.19.1/endesive/email/decrypt.py 0000664 0000000 0000000 00000007372 15042366745 0020350 0 ustar 00root root 0000000 0000000 # *-* coding: utf-8 *-*
import sys
from email import message_from_string
from asn1crypto import cms
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes
class DecryptedData(object):
def decrypt(self, data: str, key: PrivateKeyTypes) -> bytes:
msg = message_from_string(data)
data = None
for part in msg.walk():
# multipart/* are just containers
if part.get_content_maintype() == 'multipart':
continue
if part.get_content_type() not in (
'application/x-pkcs7-mime',
'application/pkcs7-mime',
):
continue
data = part.get_payload(decode=True)
break
if data is None:
raise ValueError('No encrypted part found in the message')
signed_data = cms.ContentInfo.load(data)['content']
# signed_data.debug()
algo = signed_data['encrypted_content_info']['content_encryption_algorithm']['algorithm'].native
param = signed_data['encrypted_content_info']['content_encryption_algorithm']['parameters'].native
edata = signed_data['encrypted_content_info']['encrypted_content'].native
pkey = signed_data['recipient_infos'].native[0]['encrypted_key']
keyalgo = signed_data['recipient_infos'].native[0]['key_encryption_algorithm']
if keyalgo['algorithm'] == 'rsaes_oaep':
keyparam = keyalgo['parameters']
mga = keyparam['mask_gen_algorithm']
mgh = getattr(hashes, mga['parameters']['algorithm'].upper())()
pad = padding.OAEP(
mgf=getattr(padding, mga['algorithm'].upper())(algorithm=mgh),
algorithm=getattr(hashes, keyparam['hash_algorithm']['algorithm'].upper())(),
label=keyparam['p_source_algorithm']['parameters']
)
udata = key.decrypt(pkey, pad)
elif keyalgo['algorithm'] == 'rsaes_pkcs1v15':
udata = key.decrypt(pkey, padding.PKCS1v15())
else:
raise ValueError('Unknown key algorithm', keyalgo['algorithm'])
algorithm, mode = algo.split('_', 1)
algorithm = algorithm.upper()
if algorithm in (
'AES128',
'AES192',
'AES256',
):
cipher = Cipher(
algorithms.AES(udata),
getattr(modes, mode.upper())(param),
default_backend()
)
elif algorithm == 'TRIPLEDES':
# XXX will be removed in version 48.0.0
from cryptography.hazmat.decrepit.ciphers.algorithms import TripleDES
# XXX howto decode parameters to CBC mode ?
mode = 'cbc'
cipher = Cipher(
TripleDES(udata),
getattr(modes, mode.upper())(param),
default_backend()
)
else:
raise ValueError('Unknown algorithm', algo)
decryptor = cipher.decryptor()
udata = decryptor.update(edata) + decryptor.finalize()
#if keyalgo['algorithm'] != 'rsaes_oaep':
nb = ord(udata[-1]) if sys.version[0] < '3' else udata[-1]
udata = udata[:-nb]
return udata
def decrypt(data: str, key: PrivateKeyTypes) -> bytes:
"""
Decrypt the given data string using the provided private key.
:param data: The encrypted data as a string.
:param key: The private key used for decryption.
:return: The decrypted data as bytes.
"""
cls = DecryptedData()
return cls.decrypt(data, key)
endesive-2.19.1/endesive/email/encrypt.py 0000664 0000000 0000000 00000012021 15042366745 0020345 0 ustar 00root root 0000000 0000000 # *-* coding: utf-8 *-*
import sys
import os
from email.mime.application import MIMEApplication
from asn1crypto import cms, core, algos
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives.asymmetric import padding
from endesive import signer
class EncryptedData(object):
def email(self, data: bytes, oaep: bool) -> str:
prefix = ['x-', ''][oaep]
msg = MIMEApplication(data)
del msg['Content-Type']
msg['Content-Disposition'] = 'attachment; filename="smime.p7m"'
msg['Content-Type'] = 'application/%spkcs7-mime; smime-type=enveloped-data; name="smime.p7m"' % prefix
data = msg.as_string()
return data
def pad(self, s, block_size):
n = block_size - len(s) % block_size
if n == 0:
n = block_size
#return s
n = bytes([n] * n)
return s + n
def recipient_info(self, cert, session_key, oaep):
public_key = cert.public_key()
cert = signer.cert2asn(cert)
tbs_cert = cert['tbs_certificate']
# TODO: use subject_key_identifier when available
if oaep:
encrypted_key = public_key.encrypt(
session_key,
padding.OAEP(
mgf=padding.MGF1(hashes.SHA512()),
algorithm=hashes.SHA512(),
label=None
)
)
kea = cms.KeyEncryptionAlgorithm({
'algorithm': cms.KeyEncryptionAlgorithmId('rsaes_oaep'),
'parameters': algos.RSAESOAEPParams({
'hash_algorithm': algos.DigestAlgorithm({'algorithm': 'sha512'}),
'mask_gen_algorithm': algos.MaskGenAlgorithm({
'algorithm': algos.MaskGenAlgorithmId('mgf1'),
'parameters': {
'algorithm': algos.DigestAlgorithmId('sha512'),
}
}),
'p_source_algorithm': algos.PSourceAlgorithm({
'algorithm': algos.PSourceAlgorithmId('p_specified'),
'parameters': b'',
})
})
})
else:
kea = {'algorithm': 'rsa'}
encrypted_key = public_key.encrypt(session_key, padding.PKCS1v15())
result = cms.RecipientInfo(
name='ktri',
value={
'version': 'v0',
'rid': cms.RecipientIdentifier(
name='issuer_and_serial_number',
value={
'issuer': tbs_cert['issuer'],
'serial_number': tbs_cert['serial_number']
}
),
'key_encryption_algorithm': kea,
'encrypted_key': core.OctetString(encrypted_key)
}
)
return result
def build(self, data, certs, algo, oaep):
key_size = {
'aes128': 16,
'aes192': 24,
'aes256': 32,
}[algo.split('_', 1)[0]]
block_size = 16
session_key = os.urandom(key_size)
iv = os.urandom(block_size)
cipher = Cipher(
algorithms.AES(session_key),
getattr(modes, algo.split('_', 1)[1].upper())(iv),
default_backend()
)
data = self.pad(data, block_size)
encryptor = cipher.encryptor()
data = encryptor.update(data) + encryptor.finalize()
recipient_infos = []
for cert in certs:
recipient_info = self.recipient_info(cert, session_key, oaep)
recipient_infos.append(recipient_info)
enveloped_data = cms.ContentInfo({
'content_type': 'enveloped_data',
'content': {
'version': 'v0',
'recipient_infos': recipient_infos,
'encrypted_content_info': {
'content_type': 'data',
'content_encryption_algorithm': {
'algorithm': algo,
'parameters': iv
},
'encrypted_content': data
}
}
})
data = self.email(enveloped_data.dump(), oaep)
return data
def encrypt(data:bytes, certs:list[x509.Certificate], algo:str='aes256_cbc', oaep:bool=False) -> bytes:
"""
Encrypt the given data bytes using the provided certificates and algorithm.
:param data: The data to encrypt.
:param certs: A list of x509.Certificate objects to use for encryption.
:param algo: The encryption algorithm to use (allowed are: aes256_cbc, aes256_ofb).
:param oaep: Whether to use OAEP padding (default is False).
:return: The encrypted data as bytes.
"""
assert algo[:3] == 'aes' and algo.split('_', 1)[1] in ('cbc', 'ofb')
cls = EncryptedData()
return cls.build(data, certs, algo, oaep)
endesive-2.19.1/endesive/email/sign.py 0000664 0000000 0000000 00000004230 15042366745 0017624 0 ustar 00root root 0000000 0000000 # *-* coding: utf-8 *-*
import base64
from cryptography import x509
from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes
from endesive import signer
class SignedData(object):
def email(self, hashalgo, datau, datas, prefix):
s = b'''\
MIME-Version: 1.0
Content-Type: multipart/signed; protocol="application/%spkcs7-signature"; micalg="%s"; boundary="----46F1AAD10BE922477643C0A33C40D389"
This is an S/MIME signed message
------46F1AAD10BE922477643C0A33C40D389
%s
------46F1AAD10BE922477643C0A33C40D389
Content-Type: application/%spkcs7-signature; name="smime.p7s"
Content-Transfer-Encoding: base64
Content-Disposition: attachment; filename="smime.p7s"
%s
------46F1AAD10BE922477643C0A33C40D389--
''' % (prefix, hashalgo, datau, prefix, datas)
return s
def build(self, datau, key, cert, othercerts, hashalgo, attrs, pss=False):
datau = datau.replace(b'\n', b'\r\n')
datas = signer.sign(datau, key, cert, othercerts, hashalgo, attrs, pss=pss)
datas = base64.encodebytes(datas)
if hashalgo == 'sha1':
hashalgo = b'sha1'
elif hashalgo == 'sha256':
hashalgo = b'sha-256'
elif hashalgo == 'sha512':
hashalgo = b'sha-512'
prefix = [b'x-', b''][pss]
data = self.email(hashalgo, datau, datas, prefix)
return data
def sign(datau:bytes, key: PrivateKeyTypes, cert: x509.Certificate, certs: list[x509.Certificate], hashalgo='sha1', attrs=True, pss=False)->bytes:
"""
Sign data with private key and encapsulate the result (data and signature) as S/MIME message.
:param datau: Data to sign (bytes).
:param key: Private key to sign with (PrivateKeyTypes).
:param cert: Certificate to sign with (x509.Certificate).
:param certs: List of additional certificates (list of x509.Certificate).
:param hashalgo: Hash algorithm to use (str, default 'sha1').
:param attrs: Whether to include attributes (bool, default True).
:param pss: Whether to use PSS padding (bool, default False).
:return: Signed data as bytes.
"""
cls = SignedData()
return cls.build(datau, key, cert, certs, hashalgo, attrs, pss)
endesive-2.19.1/endesive/email/verify.py 0000664 0000000 0000000 00000002545 15042366745 0020177 0 ustar 00root root 0000000 0000000 # *-* coding: utf-8 *-*
from email import message_from_string
from cryptography import x509
from endesive import verifier
def verify(data:bytes, certs:list[x509.Certificate]=None) -> tuple[bool, bool, bool]:
"""
Verifiy S/MIME signed email.
:param data: Email data as bytes.
:param certs: List of additional certificates used to verify signature (system independent).
:return:
hashok, signatureok, certok
hashok: bool
True if the hash matches.
signatureok: bool
True if the signature is valid.
certok: bool
True if the certificate used for signing is trusted and valid.
"""
msg = message_from_string(data)
sig = None
plain = None
for part in msg.walk():
ct = part.get_content_type()
# multipart/* are just containers
if ct.split('/')[0] == 'multipart':
continue
if ct == 'application/x-pkcs7-signature':
sig = part.get_payload(decode=True)
elif ct == 'application/pkcs7-signature':
sig = part.get_payload(decode=True)
elif ct == 'text/plain':
plain = part.get_payload(decode=False)
if sig is None:
raise ValueError('not signed email')
plain = plain.encode('utf-8')
plain = plain.replace(b'\n', b'\r\n')
return verifier.verify(sig, plain, certs)
endesive-2.19.1/endesive/hsm.py 0000664 0000000 0000000 00000050354 15042366745 0016374 0 ustar 00root root 0000000 0000000 #!/usr/bin/env vpython3
# coding: utf-8
import os
import sys
import binascii
import datetime
import PyKCS11
import base64
import hashlib
from cryptography import x509
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives import serialization
from asn1crypto import x509 as asn1x509
from asn1crypto import keys as asn1keys
from asn1crypto import pem as asn1pem
from asn1crypto import util as asn1util
import paramiko.agent
import cryptography
class BaseHSM:
def certificate(self):
"""
callback for HSM
used to identfy the ssh agents key exports via fingerprint
:return: public-key-fingerprint, certificate-in-pem
"""
raise NotImplementedError()
def sign(self, keyid, data, mech):
"""
sign
:param keyid: the keyid as returned by certificate()
:param data:
:param mech: hash algo
:return: PKCS7 signature blob
"""
raise NotImplementedError()
class HSM(BaseHSM):
def __init__(self, dllpath):
"""
Initialize HSM
:param dllpath: dynamic library used to communicate with HSM
"""
self.pkcs11 = PyKCS11.PyKCS11Lib()
self.pkcs11.load(dllpath)
self.session = None
def getSlot(self, label):
"""
Find the slot
:param label: searched slot name
:return: slot
"""
slots = self.pkcs11.getSlotList(tokenPresent=True)
for slot in slots:
info = self.pkcs11.getTokenInfo(slot)
try:
if info.label.split("\0")[0].strip() == label:
return slot
except AttributeError:
continue
return None
def create(self, label, pin, sopin):
"""
Initialize a slot
:param label: searched slot name
:param pin: pin for slot
:param sopin: administrative pin
"""
slot = self.getSlot(label)
if slot is not None:
return
slot = self.pkcs11.getSlotList(tokenPresent=True)[-1]
self.pkcs11.initToken(slot, sopin, label)
session = self.pkcs11.openSession(
slot, PyKCS11.CKF_SERIAL_SESSION | PyKCS11.CKF_RW_SESSION
)
session.login(sopin, user_type=PyKCS11.CKU_SO)
session.initPin(pin)
session.logout()
session.closeSession()
def login(self, label, pin):
"""
Start session
:param label: slot name
:param pin: pin for slot
"""
slot = self.getSlot(label)
if slot is None:
return
self.session = self.pkcs11.openSession(
slot, PyKCS11.CKF_SERIAL_SESSION | PyKCS11.CKF_RW_SESSION
)
self.session.login(pin)
def logout(self):
"""
End session
"""
if self.session is not None:
self.session.logout()
self.session.closeSession()
self.session = None
def gen_privkey(self, label, key_id, key_length=2048):
"""
Create private key
:param label: key label
:param key_id: key ID
:param key_length: key length in bits
"""
# label - just a label for identifying objects
# key_id has to be the same for both objects, it will also be necessary
# when importing the certificate, to ensure it is linked with these keys.
# key_length - key-length in bits
public_template = [
(PyKCS11.CKA_CLASS, PyKCS11.CKO_PUBLIC_KEY),
(PyKCS11.CKA_TOKEN, PyKCS11.CK_TRUE),
(PyKCS11.CKA_PRIVATE, PyKCS11.CK_FALSE),
(PyKCS11.CKA_MODULUS_BITS, key_length),
# (PyKCS11.CKA_PUBLIC_EXPONENT, (0x01, 0x00, 0x01)),
(PyKCS11.CKA_ENCRYPT, PyKCS11.CK_TRUE),
(PyKCS11.CKA_VERIFY, PyKCS11.CK_TRUE),
(PyKCS11.CKA_VERIFY_RECOVER, PyKCS11.CK_TRUE),
(PyKCS11.CKA_WRAP, PyKCS11.CK_TRUE),
(PyKCS11.CKA_LABEL, label),
(PyKCS11.CKA_ID, key_id)
# (PyKCS11.CKA_KEY_TYPE, PyKCS11.CKK_RSA),
# (PyKCS11.CKA_SENSITIVE, PyKCS11.CK_FALSE),
]
private_template = [
(PyKCS11.CKA_CLASS, PyKCS11.CKO_PRIVATE_KEY),
(PyKCS11.CKA_TOKEN, PyKCS11.CK_TRUE),
(PyKCS11.CKA_PRIVATE, PyKCS11.CK_TRUE),
(PyKCS11.CKA_DECRYPT, PyKCS11.CK_TRUE),
(PyKCS11.CKA_SIGN, PyKCS11.CK_TRUE),
(PyKCS11.CKA_SIGN_RECOVER, PyKCS11.CK_TRUE),
(PyKCS11.CKA_UNWRAP, PyKCS11.CK_TRUE),
(PyKCS11.CKA_LABEL, label),
(PyKCS11.CKA_ID, key_id)
# (PyKCS11.CKA_SENSITIVE, PyKCS11.CK_TRUE),
]
self.session.generateKeyPair(public_template, private_template)
def cert_save(self, cert, label, subject, key_id):
"""
Save certificate
:param cert: certificate
:param label: certificate label
:param subject: certificate subject
:param key_id: key ID
"""
cert_template = [
(PyKCS11.CKA_CLASS, PyKCS11.CKO_CERTIFICATE),
(PyKCS11.CKA_CERTIFICATE_TYPE, PyKCS11.CKC_X_509),
(PyKCS11.CKA_TOKEN, PyKCS11.CK_TRUE),
(PyKCS11.CKA_LABEL, label.encode("utf-8")),
(
PyKCS11.CKA_ID,
key_id,
), # must be set, and DER see Table 24, X.509 Certificate Object Attributes
(
PyKCS11.CKA_SUBJECT,
subject.encode("utf-8"),
), # must be set and DER, see Table 24, X.509 Certificate Object Attributes
# (PyKCS11.CKA_PRIVATE, PyKCS11.CK_FALSE),
# (PyKCS11.CKA_TRUSTED, PyKCS11.CK_TRUE),
# (PyKCS11.CKA_SENSITIVE, PyKCS11.CK_FALSE),
# (PyKCS11.CKA_ENCRYPT, PyKCS11.CK_TRUE),
# (PyKCS11.CKA_VERIFY, PyKCS11.CK_TRUE),
# (PyKCS11.CKA_MODIFIABLE, PyKCS11.CK_TRUE),
# (PyKCS11.CKA_ISSUER, cert.Issuer);
# (PyKCS11.CKA_SERIAL_NUMBER,cert.SerialNumber)
(PyKCS11.CKA_VALUE, cert), # must be BER-encoded
]
self.session.createObject(cert_template)
def cert_load(self, keyID):
"""
Load certificate
:param keyID: key ID
"""
rec = self.session.findObjects(
[(PyKCS11.CKA_CLASS, PyKCS11.CKO_CERTIFICATE), (PyKCS11.CKA_ID, keyID)]
)
if len(rec) == 0:
return None
value = bytes(rec[0].to_dict()["CKA_VALUE"])
return value
def certsign(self, sn, pubKey, subject, until, caprivKey, ca):
"""
Sign certificate
:param sn: serial number
:param pubKey: public key
:param subject: common name for certificate subject
:param until: until when is the certificate valid
:param caprivKey: signing key
:param ca:
None: create self signed root CA certificate
True: create indirect CA certificate
False: create user certificate
"""
args = {
"version": "v3",
"serial_number": sn,
"issuer": asn1x509.Name.build({
"common_name": "hsm Root CA",
}),
"subject": asn1x509.Name.build({
"common_name": subject,
}),
"signature": {
"algorithm": "sha256_rsa",
"parameters": None,
},
"validity": {
"not_before": asn1x509.Time({
"utc_time": asn1x509.UTCTime((datetime.datetime.utcnow() - datetime.timedelta(days=1)).strftime('%y%m%d%H%M%SZ')),
}),
"not_after": asn1x509.Time({
"utc_time": asn1x509.UTCTime(until.strftime('%y%m%d%H%M%SZ')),
}),
},
"subject_public_key_info": {
"algorithm": {
"algorithm": "rsa",
"parameters": None,
},
"public_key": pubKey,
},
}
if ca is None:
args["issuer"] = asn1x509.Name.build({
"common_name": subject,
})
args.update({
"extensions": [
{
"extn_id": "basic_constraints",
"critical": True,
"extn_value": {"ca": True, "path_len_constraint": None},
},
{
"extn_id": "key_usage",
"critical": True,
"extn_value": set(
[
"crl_sign",
"digital_signature",
"key_cert_sign",
]
),
},
]
})
elif ca:
extensions = [
{
# 'extn_id': 'crl_distribution_points',
# 'critical': False,
# 'extn_value': [
# asn1util.OrderedDict([
# ('distribution_point', ['http://ca.trisoft.com.pl/crl']),
# ('reasons', None),
# ('crl_issuer', None),
# ]),
# ]
#}, {
# 'extn_id':'authority_information_access',
# 'critical': False,
# 'extn_value': [
# {
# 'access_method': 'ca_issuers',
# 'access_location': asn1x509.URI('http://ca.trisoft.com.pl/cacert'),
# }, {
# 'access_method': 'ocsp',
# 'access_location': asn1x509.URI('http://ca.trisoft.com.pl/ocsp'),
# }
# ]
#}, {
'extn_id': 'authority_key_identifier',
'critical': False,
'extn_value': {
'key_identifier': None,
'authority_cert_issuer': None,
'authority_cert_serial_number': 1,
}
#}, {
# 'extn_id': 'key_identifier',
# 'critical': False,
# 'extn_value': b'',
}, {
"extn_id": "basic_constraints",
"critical": True,
"extn_value": {"ca": True, "path_len_constraint": 0},
}, {
"extn_id": "key_usage",
"critical": True,
"extn_value": set([
"crl_sign",
"digital_signature",
"key_cert_sign",
]),
},
]
args.update({
"extensions": extensions
})
else:
extensions = [
#asn1util.OrderedDict([
# ('extn_id', 'subject_alt_name'),
# ('critical', False),
# ('extn_value', ['demo1@trisoft.com.pl'])
#]),
{
# 'extn_id': 'subject_alt_name',
# 'critical': False,
# 'extn_value': {'rfc822_name' : 'demo1@trisoft.com.pl'},
#}, {
'extn_id': 'authority_key_identifier',
'critical': False,
'extn_value': {
'key_identifier': None,
'authority_cert_issuer': None,
'authority_cert_serial_number': 2,
}
#}, {
# 'extn_id': 'key_identifier',
# 'critical': False,
# 'extn_value': b'',
}, {
"extn_id": "basic_constraints",
"critical": True,
"extn_value": {"ca": False},
}, {
"extn_id": "key_usage",
"critical": True,
"extn_value": set([
"digital_signature",
"key_agreement",
"key_encipherment",
"non_repudiation",
]),
},
]
args.update({
"issuer": asn1x509.Name.build({
"common_name": "hsm Indirect CA",
}),
"extensions": extensions,
})
tbs = asn1x509.TbsCertificate(args)
# Sign the TBS Certificate
data = tbs.dump()
value = self.session.sign(
caprivKey, data, PyKCS11.Mechanism(PyKCS11.CKM_SHA256_RSA_PKCS, None)
)
value = bytes(bytearray(value))
cert = asn1x509.Certificate(
{
"tbs_certificate": tbs,
"signature_algorithm": {
"algorithm": "sha256_rsa",
"parameters": None,
},
"signature_value": value,
}
)
return cert.dump()
def ca_gen(self, label, keyID, subject):
"""
Initiate root CA certificate
:param label: HSM key label
:param keyID: HSM key ID
:param subject: common name in certificate subject
"""
privKey = self.session.findObjects(
[(PyKCS11.CKA_CLASS, PyKCS11.CKO_PRIVATE_KEY), (PyKCS11.CKA_ID, keyID)]
)[0]
pubKey = self.session.findObjects(
[(PyKCS11.CKA_CLASS, PyKCS11.CKO_PUBLIC_KEY), (PyKCS11.CKA_ID, keyID)]
)[0]
modulus = self.session.getAttributeValue(pubKey, [PyKCS11.CKA_MODULUS])[0]
modulus = binascii.hexlify(bytearray(modulus)).decode("utf-8")
exponent = self.session.getAttributeValue(
pubKey, [PyKCS11.CKA_PUBLIC_EXPONENT]
)[0]
exponent = binascii.hexlify(bytearray(exponent)).decode("utf-8")
pubKey = asn1keys.RSAPublicKey(
{
"modulus": int("0x" + modulus, 16),
"public_exponent": int("0x" + exponent, 16),
}
)
# pubKey = asn1keys.RSAPublicKey.load(pubKey.dump())
until = datetime.datetime.utcnow() + datetime.timedelta(
days=365 * 40
)
der_bytes = self.certsign(1, pubKey, subject, until, privKey, None)
self.cert_save(der_bytes, label, subject, keyID)
def ca_sign(self, keyID, label, sn, subject, days, cakeyID):
"""
Sign certificate
:param keyID: HSM key identifier of the signed certificate
:param label: HSM label
:param sn: certificate serial number
:param subject: HSM subject, common name stored in subject fiel of the signed certificate
:param days: validity day of certificate
:param cakeyID: HSM key identifier of the signing key
"""
caprivKey = self.session.findObjects(
[(PyKCS11.CKA_CLASS, PyKCS11.CKO_PRIVATE_KEY), (PyKCS11.CKA_ID, cakeyID)]
)[0]
pubKey = self.session.findObjects(
[(PyKCS11.CKA_CLASS, PyKCS11.CKO_PUBLIC_KEY), (PyKCS11.CKA_ID, keyID)]
)[0]
modulus = self.session.getAttributeValue(pubKey, [PyKCS11.CKA_MODULUS])[0]
modulus = binascii.hexlify(bytearray(modulus)).decode("utf-8")
exponent = self.session.getAttributeValue(
pubKey, [PyKCS11.CKA_PUBLIC_EXPONENT]
)[0]
exponent = binascii.hexlify(bytearray(exponent)).decode("utf-8")
pubKey = asn1keys.RSAPublicKey(
{
"modulus": int("0x" + modulus, 16),
"public_exponent": int("0x" + exponent, 16),
}
)
# pubKey = asn1keys.RSAPublicKey.load(pubKey.dump())
until = datetime.datetime.utcnow() + datetime.timedelta(
days=days
)
der_bytes = self.certsign(sn, pubKey, subject, until, caprivKey, keyID == b'\x02')
self.cert_save(der_bytes, label, subject, keyID)
def cert_export(self, fname, keyID):
der_bytes = self.cert_load(keyID)
pem_bytes = asn1pem.armor("CERTIFICATE", der_bytes)
with open(fname + ".der", "wb") as fp:
fp.write(der_bytes)
with open(fname + ".pem", "wb") as fp:
fp.write(pem_bytes)
class SSHAgentHSM(BaseHSM):
def __init__(self, cert):
assert isinstance(cert, cryptography.x509.Certificate)
self._a = paramiko.agent.Agent()
self._cert = cert
def close(self):
self._a.close()
def certificate(self):
"""
callback for HSM
used to identfy the ssh agents key exports via fingerprint
:return: public-key-fingerprint, certificate-in-pem
"""
# https://superuser.com/questions/421997/what-is-a-ssh-key-fingerprint-and-how-is-it-generated
# convert RSA Key to SSH Fingerprint
alg, key = (
self._cert.public_key()
.public_bytes(
encoding=cryptography.hazmat.primitives.serialization.Encoding.OpenSSH,
format=cryptography.hazmat.primitives.serialization.PublicFormat.OpenSSH,
)
.split(b" ")
)
fp = b"SHA256:" + base64.b64encode(
hashlib.sha256(base64.b64decode(key)).digest()
)
cert = self._cert.public_bytes(
cryptography.hazmat.primitives.serialization.Encoding.PEM
)
return fp, cert
@staticmethod
def _decode_fp(keyfp):
"""
decode a fingerprint
:param keyfp: key fingerprint in OpenSSH Format
:return: alg, fingerprint-binary
"""
if not isinstance(keyfp, str):
keyfp = keyfp.decode()
alg, other = keyfp.split(":", 1)
if alg == "SHA256":
# pad base64 data
data = other.encode() + b"=" * (-len(other) % 4)
fp = base64.b64decode(data)
elif alg == "MD5":
data = other.replace(":", " ")
fp = bytes.fromhex(data)
else:
raise ValueError(alg)
return alg.lower(), fp
def key(self, fp):
"""
lookup a ssh-agent-exported key using fingerprint
:param fp: the fingerprint
:return: the key on success
"""
alg, fp = self._decode_fp(fp)
for key in self._a.get_keys():
kfp = getattr(hashlib, alg)(key.asbytes()).digest()
if kfp == fp:
break
else:
raise ValueError("Key not found")
return key
def sign(self, keyid, data, hashalgo):
"""
sign using ssh-agent sign_data
creates RSA signature with padding=PKCS1v15 alg=SHA1
:param keyid: the keyid as returned by certificate()
:param data:
:param hashalgo: has to be sha1, sha256 or sha512
:return: PKCS7 signature blob
"""
assert hashalgo in ("sha1", "sha256", "sha512")
if not isinstance(data, bytes):
data = data.encode()
# defined in
# SSH Agent Protocol draft-miller-ssh-agent-00 5.3. Signature flags
# https://tools.ietf.org/html/draft-miller-ssh-agent-00#section-5.3
flags = {
"sha1": 0,
"sha256": 2, # SSH_AGENT_RSA_SHA2_256
"sha512": 4, # SSH_AGENT_RSA_SHA2_512
}[hashalgo]
key = self.key(keyid)
# AgentKey.sign_ssh_data is padding=PKCS1v15 alg=SHA1
# paramiko does not expose the ssh-agent sign flags to use sha2-256/512
# re-implement sign_ssh_agent ..
msg = paramiko.message.Message()
msg.add_byte(paramiko.agent.cSSH2_AGENTC_SIGN_REQUEST)
msg.add_string(key.blob)
msg.add_string(data)
msg.add_int(flags)
ptype, result = self._a._send_message(msg)
if ptype != paramiko.agent.SSH2_AGENT_SIGN_RESPONSE:
raise paramiko.SSHException("key cannot be used for signing")
d = paramiko.message.Message(result.get_binary())
# parse operation result
alg = d.get_text()
# interpret
if alg in ("ssh-rsa", "rsa-sha2-256", "rsa-sha2-512"):
sig = d.get_binary()
else:
raise ValueError(alg)
return sig
endesive-2.19.1/endesive/pdf/ 0000775 0000000 0000000 00000000000 15042366745 0015775 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2/ 0000775 0000000 0000000 00000000000 15042366745 0017001 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2/__init__.py 0000664 0000000 0000000 00000000322 15042366745 0021107 0 ustar 00root root 0000000 0000000 from .pdf import PdfFileReader, PdfFileWriter
from .merger import PdfFileMerger
from .pagerange import PageRange, parse_filename_page_ranges
from ._version import __version__
__all__ = ["pdf", "PdfFileMerger"]
endesive-2.19.1/endesive/pdf/PyPDF2/_version.py 0000664 0000000 0000000 00000000027 15042366745 0021176 0 ustar 00root root 0000000 0000000 __version__ = '1.26.0'
endesive-2.19.1/endesive/pdf/PyPDF2/filters.py 0000664 0000000 0000000 00000037326 15042366745 0021036 0 ustar 00root root 0000000 0000000 # vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of stream filters for PDF.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
import math
from .utils import PdfReadError, ord_, chr_, paethPredictor
from sys import version_info
if version_info < ( 3, 0 ):
from cStringIO import StringIO
else:
from io import StringIO
import struct
try:
import zlib
def decompress(data):
return zlib.decompress(data)
def compress(data):
return zlib.compress(data)
except ImportError:
# Unable to import zlib. Attempt to use the System.IO.Compression
# library from the .NET framework. (IronPython only)
import System
from System import IO, Collections, Array
def _string_to_bytearr(buf):
retval = Array.CreateInstance(System.Byte, len(buf))
for i in range(len(buf)):
retval[i] = ord(buf[i])
return retval
def _bytearr_to_string(bytes):
retval = ""
for i in range(bytes.Length):
retval += chr(bytes[i])
return retval
def _read_bytes(stream):
ms = IO.MemoryStream()
buf = Array.CreateInstance(System.Byte, 2048)
while True:
bytes = stream.Read(buf, 0, buf.Length)
if bytes == 0:
break
else:
ms.Write(buf, 0, bytes)
retval = ms.ToArray()
ms.Close()
return retval
def decompress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
ms.Write(bytes, 0, bytes.Length)
ms.Position = 0 # fseek 0
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
bytes = _read_bytes(gz)
retval = _bytearr_to_string(bytes)
gz.Close()
return retval
def compress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
gz.Write(bytes, 0, bytes.Length)
gz.Close()
ms.Position = 0 # fseek 0
bytes = ms.ToArray()
retval = _bytearr_to_string(bytes)
ms.Close()
return retval
class FlateDecode(object):
def decode(data, decodeParms):
data = decompress(data)
predictor = 1
if decodeParms:
try:
predictor = decodeParms.get("/Predictor", 1)
except AttributeError:
pass # usually an array with a null object was read
# predictor 1 == no predictor
if predictor != 1:
columns = decodeParms["/Columns"]
# PNG prediction:
if predictor >= 10 and predictor <= 15:
output = StringIO()
# PNG prediction can vary from row to row
rowlength = columns + 1
assert len(data) % rowlength == 0
prev_rowdata = (0,) * rowlength
for row in range(len(data) // rowlength):
rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
filterByte = rowdata[0]
if filterByte == 0:
pass
elif filterByte == 1:
for i in range(2, rowlength):
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
elif filterByte == 2:
for i in range(1, rowlength):
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
elif filterByte == 3:
for i in range(1, rowlength):
left = rowdata[i-1] if i > 1 else 0
floor = math.floor(left + prev_rowdata[i])/2
rowdata[i] = (rowdata[i] + int(floor)) % 256
elif filterByte == 4:
for i in range(1, rowlength):
left = rowdata[i - 1] if i > 1 else 0
up = prev_rowdata[i]
up_left = prev_rowdata[i - 1] if i > 1 else 0
paeth = paethPredictor(left, up, up_left)
rowdata[i] = (rowdata[i] + paeth) % 256
else:
# unsupported PNG filter
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
prev_rowdata = rowdata
output.write(''.join([chr(x) for x in rowdata[1:]]))
data = output.getvalue()
else:
# unsupported predictor
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
return data
decode = staticmethod(decode)
def encode(data):
return compress(data)
encode = staticmethod(encode)
class ASCIIHexDecode(object):
def decode(data, decodeParms=None):
retval = ""
char = ""
x = 0
while True:
c = data[x]
if c == ">":
break
elif c.isspace():
x += 1
continue
char += c
if len(char) == 2:
retval += chr(int(char, base=16))
char = ""
x += 1
assert char == ""
return retval
decode = staticmethod(decode)
class LZWDecode(object):
"""Taken from:
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
"""
class decoder(object):
def __init__(self, data):
self.STOP=257
self.CLEARDICT=256
self.data=data
self.bytepos=0
self.bitpos=0
self.dict=[""]*4096
for i in range(256):
self.dict[i]=chr(i)
self.resetDict()
def resetDict(self):
self.dictlen=258
self.bitspercode=9
def nextCode(self):
fillbits=self.bitspercode
value=0
while fillbits>0 :
if self.bytepos >= len(self.data):
return -1
nextbits=ord_(self.data[self.bytepos])
bitsfromhere=8-self.bitpos
if bitsfromhere>fillbits:
bitsfromhere=fillbits
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
(0xff >> (8-bitsfromhere))) <<
(fillbits-bitsfromhere))
fillbits -= bitsfromhere
self.bitpos += bitsfromhere
if self.bitpos >=8:
self.bitpos=0
self.bytepos = self.bytepos+1
return value
def decode(self):
""" algorithm derived from:
http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
and the PDFReference
"""
cW = self.CLEARDICT;
baos=""
while True:
pW = cW;
cW = self.nextCode();
if cW == -1:
raise PdfReadError("Missed the stop code in LZWDecode!")
if cW == self.STOP:
break;
elif cW == self.CLEARDICT:
self.resetDict();
elif pW == self.CLEARDICT:
baos+=self.dict[cW]
else:
if cW < self.dictlen:
baos += self.dict[cW]
p=self.dict[pW]+self.dict[cW][0]
self.dict[self.dictlen]=p
self.dictlen+=1
else:
p=self.dict[pW]+self.dict[pW][0]
baos+=p
self.dict[self.dictlen] = p;
self.dictlen+=1
if (self.dictlen >= (1 << self.bitspercode) - 1 and
self.bitspercode < 12):
self.bitspercode+=1
return baos
@staticmethod
def decode(data,decodeParams=None):
return LZWDecode.decoder(data).decode()
class ASCII85Decode(object):
def decode(data, decodeParms=None):
if version_info < ( 3, 0 ):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
x += 1
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
else:
break
else:
c = ord(c) - 33
assert c >= 0 and c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
else:
if isinstance(data, str):
data = data.encode('ascii')
n = b = 0
out = bytearray()
for c in data:
if ord('!') <= c and c <= ord('u'):
n += 1
b = b*85+(c-33)
if n == 5:
out += struct.pack(b'>L',b)
n = b = 0
elif c == ord('z'):
assert n == 0
out += b'\0\0\0\0'
elif c == ord('~'):
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack(b'>L',b)[:n-1]
break
return bytes(out)
decode = staticmethod(decode)
class DCTDecode(object):
def decode(data, decodeParms=None):
return data
decode = staticmethod(decode)
class JPXDecode(object):
def decode(data, decodeParms=None):
return data
decode = staticmethod(decode)
class CCITTFaxDecode(object):
def decode(data, decodeParms=None, height=0):
if decodeParms:
if decodeParms.get("/K", 1) == -1:
CCITTgroup = 4
else:
CCITTgroup = 3
width = decodeParms["/Columns"]
imgSize = len(data)
tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
tiffHeader = struct.pack(tiff_header_struct,
b'II', # Byte order indication: Little endian
42, # Version number (always 42)
8, # Offset to first IFD
8, # Number of tags in IFD
256, 4, 1, width, # ImageWidth, LONG, 1, width
257, 4, 1, height, # ImageLength, LONG, 1, length
258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1
259, 3, 1, CCITTgroup, # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding
262, 3, 1, 0, # Thresholding, SHORT, 1, 0 = WhiteIsZero
273, 4, 1, struct.calcsize(tiff_header_struct), # StripOffsets, LONG, 1, length of header
278, 4, 1, height, # RowsPerStrip, LONG, 1, length
279, 4, 1, imgSize, # StripByteCounts, LONG, 1, size of image
0 # last IFD
)
return tiffHeader + data
decode = staticmethod(decode)
def decodeStreamData(stream):
from .generic import NameObject
filters = stream.get("/Filter", ())
if len(filters) and not isinstance(filters[0], NameObject):
# we have a single filter instance
filters = (filters,)
data = stream._data
# If there is not data to decode we should not try to decode the data.
if data:
for filterType in filters:
if filterType == "/FlateDecode" or filterType == "/Fl":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
data = ASCIIHexDecode.decode(data)
elif filterType == "/LZWDecode" or filterType == "/LZW":
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCII85Decode" or filterType == "/A85":
data = ASCII85Decode.decode(data)
elif filterType == "/DCTDecode":
data = DCTDecode.decode(data)
elif filterType == "/JPXDecode":
data = JPXDecode.decode(data)
elif filterType == "/CCITTFaxDecode":
height = stream.get("/Height", ())
data = CCITTFaxDecode.decode(data, stream.get("/DecodeParms"), height)
elif filterType == "/Crypt":
decodeParams = stream.get("/DecodeParams", {})
if "/Name" not in decodeParams and "/Type" not in decodeParams:
pass
else:
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
else:
# unsupported filter
raise NotImplementedError("unsupported filter %s" % filterType)
return data
endesive-2.19.1/endesive/pdf/PyPDF2/generic.py 0000664 0000000 0000000 00000131125 15042366745 0020772 0 ustar 00root root 0000000 0000000 # vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of generic PDF objects (dictionary, number, string, and so on)
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
import re
from .utils import readNonWhitespace, RC4_encrypt, skipOverComment
from .utils import b_, u_, chr_, ord_
from .utils import PdfStreamError
import warnings
from . import filters
from . import utils
import decimal
import codecs
import sys
#import debugging
ObjectPrefix = b_('/<[tf(n%')
NumberSigns = b_('+-')
IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
def readObject(stream, pdf):
tok = stream.read(1)
stream.seek(-1, 1) # reset to start
idx = ObjectPrefix.find(tok)
if idx == 0:
# name object
return NameObject.readFromStream(stream, pdf)
elif idx == 1:
# hexadecimal string OR dictionary
peek = stream.read(2)
stream.seek(-2, 1) # reset to start
if peek == b_('<<'):
return DictionaryObject.readFromStream(stream, pdf)
else:
return readHexStringFromStream(stream)
elif idx == 2:
# array object
return ArrayObject.readFromStream(stream, pdf)
elif idx == 3 or idx == 4:
# boolean object
return BooleanObject.readFromStream(stream)
elif idx == 5:
# string object
return readStringFromStream(stream)
elif idx == 6:
# null object
return NullObject.readFromStream(stream)
elif idx == 7:
# comment
while tok not in (b_('\r'), b_('\n')):
tok = stream.read(1)
# Prevents an infinite loop by raising an error if the stream is at
# the EOF
if len(tok) <= 0:
raise PdfStreamError("File ended unexpectedly.")
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
return readObject(stream, pdf)
else:
# number object OR indirect reference
peek = stream.read(20)
stream.seek(-len(peek), 1) # reset to start
if IndirectPattern.match(peek) != None:
return IndirectObject.readFromStream(stream, pdf)
else:
return NumberObject.readFromStream(stream)
class PdfObject(object):
def getObject(self):
"""Resolves indirect references."""
return self
class NullObject(PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write(b_("null"))
def readFromStream(stream):
nulltxt = stream.read(4)
if nulltxt != b_("null"):
raise utils.PdfReadError("Could not read Null object")
return NullObject()
readFromStream = staticmethod(readFromStream)
class BooleanObject(PdfObject):
def __init__(self, value):
self.value = value
def writeToStream(self, stream, encryption_key):
if self.value:
stream.write(b_("true"))
else:
stream.write(b_("false"))
def readFromStream(stream):
word = stream.read(4)
if word == b_("true"):
return BooleanObject(True)
elif word == b_("fals"):
stream.read(1)
return BooleanObject(False)
else:
raise utils.PdfReadError('Could not read Boolean object')
readFromStream = staticmethod(readFromStream)
class ArrayObject(list, PdfObject):
def writeToStream(self, stream, encryption_key):
stream.write(b_("["))
for data in self:
stream.write(b_(" "))
data.writeToStream(stream, encryption_key)
stream.write(b_(" ]"))
def readFromStream(stream, pdf):
arr = ArrayObject()
tmp = stream.read(1)
if tmp != b_("["):
raise utils.PdfReadError("Could not read array")
while True:
# skip leading whitespace
tok = stream.read(1)
while tok.isspace():
tok = stream.read(1)
stream.seek(-1, 1)
# check for array ending
peekahead = stream.read(1)
if peekahead == b_("]"):
break
stream.seek(-1, 1)
# read and append obj
arr.append(readObject(stream, pdf))
return arr
readFromStream = staticmethod(readFromStream)
class IndirectObject(PdfObject):
def __init__(self, idnum, generation, pdf):
self.idnum = idnum
self.generation = generation
self.pdf = pdf
def getObject(self):
return self.pdf.getObject(self).getObject()
def __repr__(self):
return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
def __eq__(self, other):
return (
other != None and
isinstance(other, IndirectObject) and
self.idnum == other.idnum and
self.generation == other.generation and
self.pdf is other.pdf
)
def __ne__(self, other):
return not self.__eq__(other)
def writeToStream(self, stream, encryption_key):
stream.write(b_("%s %s R" % (self.idnum, self.generation)))
def readFromStream(stream, pdf):
idnum = b_("")
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
if tok.isspace():
break
idnum += tok
generation = b_("")
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
if tok.isspace():
if not generation:
continue
break
generation += tok
r = readNonWhitespace(stream)
if r != b_("R"):
raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
return IndirectObject(int(idnum), int(generation), pdf)
readFromStream = staticmethod(readFromStream)
class FloatObject(decimal.Decimal, PdfObject):
def __new__(cls, value="0", context=None):
try:
return decimal.Decimal.__new__(cls, utils.str_(value), context)
except:
return decimal.Decimal.__new__(cls, str(value))
def __repr__(self):
if self == self.to_integral():
return str(self.quantize(decimal.Decimal(1)))
else:
# Standard formatting adds useless extraneous zeros.
o = "%.5f" % self
# Remove the zeros.
while o and o[-1] == '0':
o = o[:-1]
return o
def as_numeric(self):
return float(b_(repr(self)))
def writeToStream(self, stream, encryption_key):
stream.write(b_(repr(self)))
class NumberObject(int, PdfObject):
NumberPattern = re.compile(b_('[^+-.0-9]'))
ByteDot = b_(".")
def __new__(cls, value):
val = int(value)
try:
return int.__new__(cls, val)
except OverflowError:
return int.__new__(cls, 0)
def as_numeric(self):
return int(b_(repr(self)))
def writeToStream(self, stream, encryption_key):
stream.write(b_(repr(self)))
def readFromStream(stream):
num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
if num.find(NumberObject.ByteDot) != -1:
return FloatObject(num)
else:
return NumberObject(num)
readFromStream = staticmethod(readFromStream)
##
# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
# TextStringObject to represent the string.
def createStringObject(string):
if isinstance(string, utils.string_type):
return TextStringObject(string)
elif isinstance(string, utils.bytes_type):
try:
if string.startswith(codecs.BOM_UTF16_BE):
retval = TextStringObject(string.decode("utf-16"))
retval.autodetect_utf16 = True
return retval
else:
# This is probably a big performance hit here, but we need to
# convert string objects into the text/unicode-aware version if
# possible... and the only way to check if that's possible is
# to try. Some strings are strings, some are just byte arrays.
retval = TextStringObject(decode_pdfdocencoding(string))
retval.autodetect_pdfdocencoding = True
return retval
except UnicodeDecodeError:
return ByteStringObject(string)
else:
raise TypeError("createStringObject should have str or unicode arg")
def readHexStringFromStream(stream):
stream.read(1)
txt = ""
x = b_("")
while True:
tok = readNonWhitespace(stream)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
if tok == b_(">"):
break
x += tok
if len(x) == 2:
txt += chr(int(x, base=16))
x = b_("")
if len(x) == 1:
x += b_("0")
if len(x) == 2:
txt += chr(int(x, base=16))
return createStringObject(b_(txt))
def readStringFromStream(stream):
tok = stream.read(1)
parens = 1
txt = b_("")
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
if tok == b_("("):
parens += 1
elif tok == b_(")"):
parens -= 1
if parens == 0:
break
elif tok == b_("\\"):
tok = stream.read(1)
ESCAPE_DICT = {b_("n") : b_("\n"),
b_("r") : b_("\r"),
b_("t") : b_("\t"),
b_("b") : b_("\b"),
b_("f") : b_("\f"),
b_("c") : b_(r"\c"),
b_("(") : b_("("),
b_(")") : b_(")"),
b_("/") : b_("/"),
b_("\\") : b_("\\"),
b_(" ") : b_(" "),
b_("/") : b_("/"),
b_("%") : b_("%"),
b_("<") : b_("<"),
b_(">") : b_(">"),
b_("[") : b_("["),
b_("]") : b_("]"),
b_("#") : b_("#"),
b_("_") : b_("_"),
b_("&") : b_("&"),
b_('$') : b_('$'),
}
try:
tok = ESCAPE_DICT[tok]
except KeyError:
if tok.isdigit():
# "The number ddd may consist of one, two, or three
# octal digits; high-order overflow shall be ignored.
# Three octal digits shall be used, with leading zeros
# as needed, if the next character of the string is also
# a digit." (PDF reference 7.3.4.2, p 16)
for i in range(2):
ntok = stream.read(1)
if ntok.isdigit():
tok += ntok
else:
break
tok = b_(chr(int(tok, base=8)))
elif tok in b_("\n\r"):
# This case is hit when a backslash followed by a line
# break occurs. If it's a multi-char EOL, consume the
# second character:
tok = stream.read(1)
if not tok in b_("\n\r"):
stream.seek(-1, 1)
# Then don't add anything to the actual string, since this
# line break was escaped:
tok = b_('')
else:
raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
txt += tok
return createStringObject(txt)
##
# Represents a string object where the text encoding could not be determined.
# This occurs quite often, as the PDF spec doesn't provide an alternate way to
# represent strings -- for example, the encryption data stored in files (like
# /O) is clearly not text, but is still stored in a "String" object.
class ByteStringObject(utils.bytes_type, PdfObject):
##
# For compatibility with TextStringObject.original_bytes. This method
# returns self.
original_bytes = property(lambda self: self)
def writeToStream(self, stream, encryption_key):
bytearr = self
if encryption_key:
bytearr = RC4_encrypt(encryption_key, bytearr)
stream.write(b_("<"))
stream.write(utils.hexencode(bytearr))
stream.write(b_(">"))
##
# Represents a string object that has been decoded into a real unicode string.
# If read from a PDF document, this string appeared to match the
# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
# occur.
class TextStringObject(utils.string_type, PdfObject):
autodetect_pdfdocencoding = False
autodetect_utf16 = False
##
# It is occasionally possible that a text string object gets created where
# a byte string object was expected due to the autodetection mechanism --
# if that occurs, this "original_bytes" property can be used to
# back-calculate what the original encoded bytes were.
original_bytes = property(lambda self: self.get_original_bytes())
def get_original_bytes(self):
# We're a text string object, but the library is trying to get our raw
# bytes. This can happen if we auto-detected this string as text, but
# we were wrong. It's pretty common. Return the original bytes that
# would have been used to create this object, based upon the autodetect
# method.
if self.autodetect_utf16:
return codecs.BOM_UTF16_BE + self.encode("utf-16be")
elif self.autodetect_pdfdocencoding:
return encode_pdfdocencoding(self)
else:
raise Exception("no information about original bytes")
def writeToStream(self, stream, encryption_key):
# Try to write the string out as a PDFDocEncoding encoded string. It's
# nicer to look at in the PDF file. Sadly, we take a performance hit
# here for trying...
try:
bytearr = encode_pdfdocencoding(self)
except UnicodeEncodeError:
bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
if encryption_key:
bytearr = RC4_encrypt(encryption_key, bytearr)
obj = ByteStringObject(bytearr)
obj.writeToStream(stream, None)
else:
stream.write(b_("("))
for c in bytearr:
if not chr_(c).isalnum() and c != b_(' '):
stream.write(b_("\\%03o" % ord_(c)))
else:
stream.write(b_(chr_(c)))
stream.write(b_(")"))
class NameObject(str, PdfObject):
delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
surfix = b_("/")
def writeToStream(self, stream, encryption_key):
stream.write(b_(self))
def readFromStream(stream, pdf):
debug = False
if debug: print((stream.tell()))
name = stream.read(1)
if name != NameObject.surfix:
raise utils.PdfReadError("name read error")
name += utils.readUntilRegex(stream, NameObject.delimiterPattern,
ignore_eof=True)
if debug: print(name)
try:
return NameObject(name.decode('utf-8'))
except (UnicodeEncodeError, UnicodeDecodeError) as e:
# Name objects should represent irregular characters
# with a '#' followed by the symbol's hex number
if not pdf.strict:
warnings.warn("Illegal character in Name Object", utils.PdfReadWarning)
return NameObject(name)
else:
raise utils.PdfReadError("Illegal character in Name Object")
readFromStream = staticmethod(readFromStream)
class DictionaryObject(dict, PdfObject):
def raw_get(self, key):
return dict.__getitem__(self, key)
def __setitem__(self, key, value):
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
if not isinstance(value, PdfObject):
raise ValueError("value must be PdfObject")
return dict.__setitem__(self, key, value)
def setdefault(self, key, value=None):
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
if not isinstance(value, PdfObject):
raise ValueError("value must be PdfObject")
return dict.setdefault(self, key, value)
def __getitem__(self, key):
return dict.__getitem__(self, key).getObject()
##
# Retrieves XMP (Extensible Metadata Platform) data relevant to the
# this object, if available.
#
# Stability: Added in v1.12, will exist for all future v1.x releases.
# @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
# that can be used to access XMP metadata from the document. Can also
# return None if no metadata was found on the document root.
def getXmpMetadata(self):
metadata = self.get("/Metadata", None)
if metadata == None:
return None
metadata = metadata.getObject()
from . import xmp
if not isinstance(metadata, xmp.XmpInformation):
metadata = xmp.XmpInformation(metadata)
self[NameObject("/Metadata")] = metadata
return metadata
##
# Read-only property that accesses the {@link
# #DictionaryObject.getXmpData getXmpData} function.
#
# Stability: Added in v1.12, will exist for all future v1.x releases.
xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
def writeToStream(self, stream, encryption_key):
stream.write(b_("<<\n"))
for key, value in list(self.items()):
key.writeToStream(stream, encryption_key)
stream.write(b_(" "))
value.writeToStream(stream, encryption_key)
stream.write(b_("\n"))
stream.write(b_(">>"))
def readFromStream(stream, pdf):
debug = False
tmp = stream.read(2)
if tmp != b_("<<"):
raise utils.PdfReadError("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
data = {}
while True:
tok = readNonWhitespace(stream)
if tok == b_('\x00'):
continue
elif tok == b_('%'):
stream.seek(-1, 1)
skipOverComment(stream)
continue
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
if debug: print(("Tok:", tok))
if tok == b_(">"):
stream.read(1)
break
stream.seek(-1, 1)
key = readObject(stream, pdf)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
value = readObject(stream, pdf)
if not data.get(key):
data[key] = value
elif pdf.strict:
# multiple definitions of key not permitted
raise utils.PdfReadError("Multiple definitions in dictionary at byte %s for key %s" \
% (utils.hexStr(stream.tell()), key))
else:
warnings.warn("Multiple definitions in dictionary at byte %s for key %s" \
% (utils.hexStr(stream.tell()), key), utils.PdfReadWarning)
pos = stream.tell()
s = readNonWhitespace(stream)
if s == b_('s') and stream.read(5) == b_('tream'):
eol = stream.read(1)
# odd PDF file output has spaces after 'stream' keyword but before EOL.
# patch provided by Danial Sandler
while eol == b_(' '):
eol = stream.read(1)
assert eol in (b_("\n"), b_("\r"))
if eol == b_("\r"):
# read \n after
if stream.read(1) != b_('\n'):
stream.seek(-1, 1)
# this is a stream object, not a dictionary
assert "/Length" in data
length = data["/Length"]
if debug: print(data)
if isinstance(length, IndirectObject):
t = stream.tell()
length = pdf.getObject(length)
stream.seek(t, 0)
data["__streamdata__"] = stream.read(length)
if debug: print("here")
#if debug: print(binascii.hexlify(data["__streamdata__"]))
e = readNonWhitespace(stream)
ndstream = stream.read(8)
if (e + ndstream) != b_("endstream"):
# (sigh) - the odd PDF file has a length that is too long, so
# we need to read backwards to find the "endstream" ending.
# ReportLab (unknown version) generates files with this bug,
# and Python users into PDF files tend to be our audience.
# we need to do this to correct the streamdata and chop off
# an extra character.
pos = stream.tell()
stream.seek(-10, 1)
end = stream.read(9)
if end == b_("endstream"):
# we found it by looking back one character further.
data["__streamdata__"] = data["__streamdata__"][:-1]
else:
if debug: print(("E", e, ndstream, debugging.toHex(end)))
stream.seek(pos, 0)
raise utils.PdfReadError("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
else:
stream.seek(pos, 0)
if "__streamdata__" in data:
return StreamObject.initializeFromDictionary(data)
else:
retval = DictionaryObject()
retval.update(data)
return retval
readFromStream = staticmethod(readFromStream)
class TreeObject(DictionaryObject):
def __init__(self):
DictionaryObject.__init__(self)
def hasChildren(self):
return '/First' in self
def __iter__(self):
return self.children()
def children(self):
if not self.hasChildren():
raise StopIteration
child = self['/First']
while True:
yield child
if child == self['/Last']:
raise StopIteration
child = child['/Next']
def addChild(self, child, pdf):
childObj = child.getObject()
child = pdf.getReference(childObj)
assert isinstance(child, IndirectObject)
if '/First' not in self:
self[NameObject('/First')] = child
self[NameObject('/Count')] = NumberObject(0)
prev = None
else:
prev = self['/Last']
self[NameObject('/Last')] = child
self[NameObject('/Count')] = NumberObject(self[NameObject('/Count')] + 1)
if prev:
prevRef = pdf.getReference(prev)
assert isinstance(prevRef, IndirectObject)
childObj[NameObject('/Prev')] = prevRef
prev[NameObject('/Next')] = child
parentRef = pdf.getReference(self)
assert isinstance(parentRef, IndirectObject)
childObj[NameObject('/Parent')] = parentRef
def removeChild(self, child):
childObj = child.getObject()
if NameObject('/Parent') not in childObj:
raise ValueError("Removed child does not appear to be a tree item")
elif childObj[NameObject('/Parent')] != self:
raise ValueError("Removed child is not a member of this tree")
found = False
prevRef = None
prev = None
curRef = self[NameObject('/First')]
cur = curRef.getObject()
lastRef = self[NameObject('/Last')]
last = lastRef.getObject()
while cur != None:
if cur == childObj:
if prev == None:
if NameObject('/Next') in cur:
# Removing first tree node
nextRef = cur[NameObject('/Next')]
next = nextRef.getObject()
del next[NameObject('/Prev')]
self[NameObject('/First')] = nextRef
self[NameObject('/Count')] = self[NameObject('/Count')] - 1
else:
# Removing only tree node
assert self[NameObject('/Count')] == 1
del self[NameObject('/Count')]
del self[NameObject('/First')]
if NameObject('/Last') in self:
del self[NameObject('/Last')]
else:
if NameObject('/Next') in cur:
# Removing middle tree node
nextRef = cur[NameObject('/Next')]
next = nextRef.getObject()
next[NameObject('/Prev')] = prevRef
prev[NameObject('/Next')] = nextRef
self[NameObject('/Count')] = self[NameObject('/Count')] - 1
else:
# Removing last tree node
assert cur == last
del prev[NameObject('/Next')]
self[NameObject('/Last')] = prevRef
self[NameObject('/Count')] = self[NameObject('/Count')] - 1
found = True
break
prevRef = curRef
prev = cur
if NameObject('/Next') in cur:
curRef = cur[NameObject('/Next')]
cur = curRef.getObject()
else:
curRef = None
cur = None
if not found:
raise ValueError("Removal couldn't find item in tree")
del childObj[NameObject('/Parent')]
if NameObject('/Next') in childObj:
del childObj[NameObject('/Next')]
if NameObject('/Prev') in childObj:
del childObj[NameObject('/Prev')]
def emptyTree(self):
for child in self:
childObj = child.getObject()
del childObj[NameObject('/Parent')]
if NameObject('/Next') in childObj:
del childObj[NameObject('/Next')]
if NameObject('/Prev') in childObj:
del childObj[NameObject('/Prev')]
if NameObject('/Count') in self:
del self[NameObject('/Count')]
if NameObject('/First') in self:
del self[NameObject('/First')]
if NameObject('/Last') in self:
del self[NameObject('/Last')]
class StreamObject(DictionaryObject):
def __init__(self):
self._data = None
self.decodedSelf = None
def writeToStream(self, stream, encryption_key):
self[NameObject("/Length")] = NumberObject(len(self._data))
DictionaryObject.writeToStream(self, stream, encryption_key)
del self["/Length"]
stream.write(b_("\nstream\n"))
data = self._data
if encryption_key:
data = RC4_encrypt(encryption_key, data)
stream.write(data)
stream.write(b_("\nendstream"))
def initializeFromDictionary(data):
if "/Filter" in data:
retval = EncodedStreamObject()
else:
retval = DecodedStreamObject()
retval._data = data["__streamdata__"]
del data["__streamdata__"]
del data["/Length"]
retval.update(data)
return retval
initializeFromDictionary = staticmethod(initializeFromDictionary)
def flateEncode(self):
if "/Filter" in self:
f = self["/Filter"]
if isinstance(f, ArrayObject):
f.insert(0, NameObject("/FlateDecode"))
else:
newf = ArrayObject()
newf.append(NameObject("/FlateDecode"))
newf.append(f)
f = newf
else:
f = NameObject("/FlateDecode")
retval = EncodedStreamObject()
retval[NameObject("/Filter")] = f
retval._data = filters.FlateDecode.encode(self._data)
return retval
class DecodedStreamObject(StreamObject):
def getData(self):
return self._data
def setData(self, data):
self._data = data
class EncodedStreamObject(StreamObject):
def __init__(self):
self.decodedSelf = None
def getData(self):
if self.decodedSelf:
# cached version of decoded object
return self.decodedSelf.getData()
else:
# create decoded object
decoded = DecodedStreamObject()
decoded._data = filters.decodeStreamData(self)
for key, value in list(self.items()):
if not key in ("/Length", "/Filter", "/DecodeParms"):
decoded[key] = value
self.decodedSelf = decoded
return decoded._data
def setData(self, data):
raise utils.PdfReadError("Creating EncodedStreamObject is not currently supported")
class RectangleObject(ArrayObject):
"""
This class is used to represent *page boxes* in PyPDF2. These boxes include:
* :attr:`artBox `
* :attr:`bleedBox `
* :attr:`cropBox `
* :attr:`mediaBox `
* :attr:`trimBox `
"""
def __init__(self, arr):
# must have four points
assert len(arr) == 4
# automatically convert arr[x] into NumberObject(arr[x]) if necessary
ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
def ensureIsNumber(self, value):
if not isinstance(value, (NumberObject, FloatObject)):
value = FloatObject(value)
return value
def __repr__(self):
return "RectangleObject(%s)" % repr(list(self))
def getLowerLeft_x(self):
return self[0]
def getLowerLeft_y(self):
return self[1]
def getUpperRight_x(self):
return self[2]
def getUpperRight_y(self):
return self[3]
def getUpperLeft_x(self):
return self.getLowerLeft_x()
def getUpperLeft_y(self):
return self.getUpperRight_y()
def getLowerRight_x(self):
return self.getUpperRight_x()
def getLowerRight_y(self):
return self.getLowerLeft_y()
def getLowerLeft(self):
return self.getLowerLeft_x(), self.getLowerLeft_y()
def getLowerRight(self):
return self.getLowerRight_x(), self.getLowerRight_y()
def getUpperLeft(self):
return self.getUpperLeft_x(), self.getUpperLeft_y()
def getUpperRight(self):
return self.getUpperRight_x(), self.getUpperRight_y()
def setLowerLeft(self, value):
self[0], self[1] = [self.ensureIsNumber(x) for x in value]
def setLowerRight(self, value):
self[2], self[1] = [self.ensureIsNumber(x) for x in value]
def setUpperLeft(self, value):
self[0], self[3] = [self.ensureIsNumber(x) for x in value]
def setUpperRight(self, value):
self[2], self[3] = [self.ensureIsNumber(x) for x in value]
def getWidth(self):
return self.getUpperRight_x() - self.getLowerLeft_x()
def getHeight(self):
return self.getUpperRight_y() - self.getLowerLeft_y()
lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
"""
Property to read and modify the lower left coordinate of this box
in (x,y) form.
"""
lowerRight = property(getLowerRight, setLowerRight, None, None)
"""
Property to read and modify the lower right coordinate of this box
in (x,y) form.
"""
upperLeft = property(getUpperLeft, setUpperLeft, None, None)
"""
Property to read and modify the upper left coordinate of this box
in (x,y) form.
"""
upperRight = property(getUpperRight, setUpperRight, None, None)
"""
Property to read and modify the upper right coordinate of this box
in (x,y) form.
"""
class Field(TreeObject):
"""
A class representing a field dictionary. This class is accessed through
:meth:`getFields()`
"""
def __init__(self, data):
DictionaryObject.__init__(self)
attributes = ("/FT", "/Parent", "/Kids", "/T", "/TU", "/TM", "/Ff",
"/V", "/DV", "/AA")
for attr in attributes:
try:
self[NameObject(attr)] = data[attr]
except KeyError:
pass
fieldType = property(lambda self: self.get("/FT"))
"""
Read-only property accessing the type of this field.
"""
parent = property(lambda self: self.get("/Parent"))
"""
Read-only property accessing the parent of this field.
"""
kids = property(lambda self: self.get("/Kids"))
"""
Read-only property accessing the kids of this field.
"""
name = property(lambda self: self.get("/T"))
"""
Read-only property accessing the name of this field.
"""
altName = property(lambda self: self.get("/TU"))
"""
Read-only property accessing the alternate name of this field.
"""
mappingName = property(lambda self: self.get("/TM"))
"""
Read-only property accessing the mapping name of this field. This
name is used by PyPDF2 as a key in the dictionary returned by
:meth:`getFields()`
"""
flags = property(lambda self: self.get("/Ff"))
"""
Read-only property accessing the field flags, specifying various
characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
"""
value = property(lambda self: self.get("/V"))
"""
Read-only property accessing the value of this field. Format
varies based on field type.
"""
defaultValue = property(lambda self: self.get("/DV"))
"""
Read-only property accessing the default value of this field.
"""
additionalActions = property(lambda self: self.get("/AA"))
"""
Read-only property accessing the additional actions dictionary.
This dictionary defines the field's behavior in response to trigger events.
See Section 8.5.2 of the PDF 1.7 reference.
"""
class Destination(TreeObject):
"""
A class representing a destination within a PDF file.
See section 8.2.1 of the PDF 1.6 reference.
:param str title: Title of this destination.
:param int page: Page number of this destination.
:param str typ: How the destination is displayed.
:param args: Additional arguments may be necessary depending on the type.
:raises PdfReadError: If destination type is invalid.
Valid ``typ`` arguments (see PDF spec for details):
/Fit No additional arguments
/XYZ [left] [top] [zoomFactor]
/FitH [top]
/FitV [left]
/FitR [left] [bottom] [right] [top]
/FitB No additional arguments
/FitBH [top]
/FitBV [left]
"""
def __init__(self, title, page, typ, *args):
DictionaryObject.__init__(self)
self[NameObject("/Title")] = title
self[NameObject("/Page")] = page
self[NameObject("/Type")] = typ
# from table 8.2 of the PDF 1.7 reference.
if typ == "/XYZ":
(self[NameObject("/Left")], self[NameObject("/Top")],
self[NameObject("/Zoom")]) = args
elif typ == "/FitR":
(self[NameObject("/Left")], self[NameObject("/Bottom")],
self[NameObject("/Right")], self[NameObject("/Top")]) = args
elif typ in ["/FitH", "/FitBH"]:
self[NameObject("/Top")], = args
elif typ in ["/FitV", "/FitBV"]:
self[NameObject("/Left")], = args
elif typ in ["/Fit", "/FitB"]:
pass
else:
raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
def getDestArray(self):
return ArrayObject([self.raw_get('/Page'), self['/Type']] + [self[x] for x in ['/Left', '/Bottom', '/Right', '/Top', '/Zoom'] if x in self])
def writeToStream(self, stream, encryption_key):
stream.write(b_("<<\n"))
key = NameObject('/D')
key.writeToStream(stream, encryption_key)
stream.write(b_(" "))
value = self.getDestArray()
value.writeToStream(stream, encryption_key)
key = NameObject("/S")
key.writeToStream(stream, encryption_key)
stream.write(b_(" "))
value = NameObject("/GoTo")
value.writeToStream(stream, encryption_key)
stream.write(b_("\n"))
stream.write(b_(">>"))
title = property(lambda self: self.get("/Title"))
"""
Read-only property accessing the destination title.
:rtype: str
"""
page = property(lambda self: self.get("/Page"))
"""
Read-only property accessing the destination page number.
:rtype: int
"""
typ = property(lambda self: self.get("/Type"))
"""
Read-only property accessing the destination type.
:rtype: str
"""
zoom = property(lambda self: self.get("/Zoom", None))
"""
Read-only property accessing the zoom factor.
:rtype: int, or ``None`` if not available.
"""
left = property(lambda self: self.get("/Left", None))
"""
Read-only property accessing the left horizontal coordinate.
:rtype: int, or ``None`` if not available.
"""
right = property(lambda self: self.get("/Right", None))
"""
Read-only property accessing the right horizontal coordinate.
:rtype: int, or ``None`` if not available.
"""
top = property(lambda self: self.get("/Top", None))
"""
Read-only property accessing the top vertical coordinate.
:rtype: int, or ``None`` if not available.
"""
bottom = property(lambda self: self.get("/Bottom", None))
"""
Read-only property accessing the bottom vertical coordinate.
:rtype: int, or ``None`` if not available.
"""
class Bookmark(Destination):
def writeToStream(self, stream, encryption_key):
stream.write(b_("<<\n"))
for key in [NameObject(x) for x in ['/Title', '/Parent', '/First', '/Last', '/Next', '/Prev'] if x in self]:
key.writeToStream(stream, encryption_key)
stream.write(b_(" "))
value = self.raw_get(key)
value.writeToStream(stream, encryption_key)
stream.write(b_("\n"))
key = NameObject('/Dest')
key.writeToStream(stream, encryption_key)
stream.write(b_(" "))
value = self.getDestArray()
value.writeToStream(stream, encryption_key)
stream.write(b_("\n"))
stream.write(b_(">>"))
def encode_pdfdocencoding(unicode_string):
retval = b_('')
for c in unicode_string:
try:
retval += b_(chr(_pdfDocEncoding_rev[c]))
except KeyError:
raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
"does not exist in translation table")
return retval
def decode_pdfdocencoding(byte_array):
retval = u_('')
for b in byte_array:
c = _pdfDocEncoding[ord_(b)]
if c == u_('\u0000'):
raise UnicodeDecodeError("pdfdocencoding", utils.barray(b), -1, -1,
"does not exist in translation table")
retval += c
return retval
_pdfDocEncoding = (
u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
u_('\u02d8'), u_('\u02c7'), u_('\u02c6'), u_('\u02d9'), u_('\u02dd'), u_('\u02db'), u_('\u02da'), u_('\u02dc'),
u_('\u0020'), u_('\u0021'), u_('\u0022'), u_('\u0023'), u_('\u0024'), u_('\u0025'), u_('\u0026'), u_('\u0027'),
u_('\u0028'), u_('\u0029'), u_('\u002a'), u_('\u002b'), u_('\u002c'), u_('\u002d'), u_('\u002e'), u_('\u002f'),
u_('\u0030'), u_('\u0031'), u_('\u0032'), u_('\u0033'), u_('\u0034'), u_('\u0035'), u_('\u0036'), u_('\u0037'),
u_('\u0038'), u_('\u0039'), u_('\u003a'), u_('\u003b'), u_('\u003c'), u_('\u003d'), u_('\u003e'), u_('\u003f'),
u_('\u0040'), u_('\u0041'), u_('\u0042'), u_('\u0043'), u_('\u0044'), u_('\u0045'), u_('\u0046'), u_('\u0047'),
u_('\u0048'), u_('\u0049'), u_('\u004a'), u_('\u004b'), u_('\u004c'), u_('\u004d'), u_('\u004e'), u_('\u004f'),
u_('\u0050'), u_('\u0051'), u_('\u0052'), u_('\u0053'), u_('\u0054'), u_('\u0055'), u_('\u0056'), u_('\u0057'),
u_('\u0058'), u_('\u0059'), u_('\u005a'), u_('\u005b'), u_('\u005c'), u_('\u005d'), u_('\u005e'), u_('\u005f'),
u_('\u0060'), u_('\u0061'), u_('\u0062'), u_('\u0063'), u_('\u0064'), u_('\u0065'), u_('\u0066'), u_('\u0067'),
u_('\u0068'), u_('\u0069'), u_('\u006a'), u_('\u006b'), u_('\u006c'), u_('\u006d'), u_('\u006e'), u_('\u006f'),
u_('\u0070'), u_('\u0071'), u_('\u0072'), u_('\u0073'), u_('\u0074'), u_('\u0075'), u_('\u0076'), u_('\u0077'),
u_('\u0078'), u_('\u0079'), u_('\u007a'), u_('\u007b'), u_('\u007c'), u_('\u007d'), u_('\u007e'), u_('\u0000'),
u_('\u2022'), u_('\u2020'), u_('\u2021'), u_('\u2026'), u_('\u2014'), u_('\u2013'), u_('\u0192'), u_('\u2044'),
u_('\u2039'), u_('\u203a'), u_('\u2212'), u_('\u2030'), u_('\u201e'), u_('\u201c'), u_('\u201d'), u_('\u2018'),
u_('\u2019'), u_('\u201a'), u_('\u2122'), u_('\ufb01'), u_('\ufb02'), u_('\u0141'), u_('\u0152'), u_('\u0160'),
u_('\u0178'), u_('\u017d'), u_('\u0131'), u_('\u0142'), u_('\u0153'), u_('\u0161'), u_('\u017e'), u_('\u0000'),
u_('\u20ac'), u_('\u00a1'), u_('\u00a2'), u_('\u00a3'), u_('\u00a4'), u_('\u00a5'), u_('\u00a6'), u_('\u00a7'),
u_('\u00a8'), u_('\u00a9'), u_('\u00aa'), u_('\u00ab'), u_('\u00ac'), u_('\u0000'), u_('\u00ae'), u_('\u00af'),
u_('\u00b0'), u_('\u00b1'), u_('\u00b2'), u_('\u00b3'), u_('\u00b4'), u_('\u00b5'), u_('\u00b6'), u_('\u00b7'),
u_('\u00b8'), u_('\u00b9'), u_('\u00ba'), u_('\u00bb'), u_('\u00bc'), u_('\u00bd'), u_('\u00be'), u_('\u00bf'),
u_('\u00c0'), u_('\u00c1'), u_('\u00c2'), u_('\u00c3'), u_('\u00c4'), u_('\u00c5'), u_('\u00c6'), u_('\u00c7'),
u_('\u00c8'), u_('\u00c9'), u_('\u00ca'), u_('\u00cb'), u_('\u00cc'), u_('\u00cd'), u_('\u00ce'), u_('\u00cf'),
u_('\u00d0'), u_('\u00d1'), u_('\u00d2'), u_('\u00d3'), u_('\u00d4'), u_('\u00d5'), u_('\u00d6'), u_('\u00d7'),
u_('\u00d8'), u_('\u00d9'), u_('\u00da'), u_('\u00db'), u_('\u00dc'), u_('\u00dd'), u_('\u00de'), u_('\u00df'),
u_('\u00e0'), u_('\u00e1'), u_('\u00e2'), u_('\u00e3'), u_('\u00e4'), u_('\u00e5'), u_('\u00e6'), u_('\u00e7'),
u_('\u00e8'), u_('\u00e9'), u_('\u00ea'), u_('\u00eb'), u_('\u00ec'), u_('\u00ed'), u_('\u00ee'), u_('\u00ef'),
u_('\u00f0'), u_('\u00f1'), u_('\u00f2'), u_('\u00f3'), u_('\u00f4'), u_('\u00f5'), u_('\u00f6'), u_('\u00f7'),
u_('\u00f8'), u_('\u00f9'), u_('\u00fa'), u_('\u00fb'), u_('\u00fc'), u_('\u00fd'), u_('\u00fe'), u_('\u00ff')
)
assert len(_pdfDocEncoding) == 256
_pdfDocEncoding_rev = {}
for i in range(256):
char = _pdfDocEncoding[i]
if char == u_("\u0000"):
continue
assert char not in _pdfDocEncoding_rev
_pdfDocEncoding_rev[char] = i
endesive-2.19.1/endesive/pdf/PyPDF2/merger.py 0000664 0000000 0000000 00000051647 15042366745 0020651 0 ustar 00root root 0000000 0000000 # vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from .generic import *
from .utils import isString, str_
from .pdf import PdfFileReader, PdfFileWriter
from .pagerange import PageRange
from sys import version_info
if version_info < ( 3, 0 ):
from cStringIO import StringIO
StreamIO = StringIO
else:
from io import BytesIO
from io import FileIO as file
StreamIO = BytesIO
class _MergedPage(object):
"""
_MergedPage is used internally by PdfFileMerger to collect necessary
information on each page that is being merged.
"""
def __init__(self, pagedata, src, id):
self.src = src
self.pagedata = pagedata
self.out_pagedata = None
self.id = id
class PdfFileMerger(object):
"""
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
into a single PDF. It can concatenate, slice, insert, or any combination
of the above.
See the functions :meth:`merge()` (or :meth:`append()`)
and :meth:`write()` for usage information.
:param bool strict: Determines whether user should be warned of all
problems and also causes some correctable problems to be fatal.
Defaults to ``True``.
"""
def __init__(self, strict=True):
self.inputs = []
self.pages = []
self.output = PdfFileWriter()
self.bookmarks = []
self.named_dests = []
self.id_count = 0
self.strict = strict
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Merges the pages from the given file into the output file at the
specified page number.
:param int position: The *page number* to insert this file. File will
be inserted after the given number.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range ` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
my_file = False
# If the fileobj parameter is a string, assume it is a path
# and create a file object at that location. If it is a file,
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
decryption_key = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"):
fileobj.seek(0)
filecontent = fileobj.read()
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True
# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = PdfFileReader(fileobj, strict=self.strict)
if decryption_key is not None:
pdfr._decryption_key = decryption_key
# Find the range of pages to merge.
if pages == None:
pages = (0, pdfr.getNumPages())
elif isinstance(pages, PageRange):
pages = pages.indices(pdfr.getNumPages())
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
srcpages = []
if bookmark:
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
outline = []
if import_bookmarks:
outline = pdfr.getOutlines()
outline = self._trim_outline(pdfr, outline, pages)
if bookmark:
self.bookmarks += [bookmark, outline]
else:
self.bookmarks += outline
dests = pdfr.namedDestinations
dests = self._trim_dests(pdfr, dests, pages)
self.named_dests += dests
# Gather all the pages that are going to be merged
for i in range(*pages):
pg = pdfr.getPage(i)
id = self.id_count
self.id_count += 1
mp = _MergedPage(pg, pdfr, id)
srcpages.append(mp)
self._associate_dests_to_pages(srcpages)
self._associate_bookmarks_to_pages(srcpages)
# Slice to insert the pages at the specified position
self.pages[position:position] = srcpages
# Keep track of our input files so we can close them later
self.inputs.append((fileobj, pdfr, my_file))
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Identical to the :meth:`merge()` method, but assumes you want to concatenate
all pages onto the end of the file instead of specifying a position.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range ` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
def write(self, fileobj):
"""
Writes all data that has been merged to the given output file.
:param fileobj: Output file. Can be a filename or any kind of
file-like object.
"""
my_file = False
if isString(fileobj):
fileobj = file(fileobj, 'wb')
my_file = True
# Add pages to the PdfFileWriter
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
for page in self.pages:
self.output.addPage(page.pagedata)
page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
#idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
#page.out_pagedata = IndirectObject(idnum, 0, self.output)
# Once all pages are added, create bookmarks to point at those pages
self._write_dests()
self._write_bookmarks()
# Write the output to the file
self.output.write(fileobj)
if my_file:
fileobj.close()
def close(self):
"""
Shuts all file descriptors (input and output) and clears all memory
usage.
"""
self.pages = []
for fo, pdfr, mine in self.inputs:
if mine:
fo.close()
self.inputs = []
self.output = None
def addMetadata(self, infos):
"""
Add custom metadata to the output.
:param dict infos: a Python dictionary where each key is a field
and each value is your new metadata.
Example: ``{u'/Title': u'My title'}``
"""
self.output.addMetadata(infos)
def setPageLayout(self, layout):
"""
Set the page layout
:param str layout: The page layout to be used
Valid layouts are:
/NoLayout Layout explicitly not specified
/SinglePage Show one page at a time
/OneColumn Show one column at a time
/TwoColumnLeft Show pages in two columns, odd-numbered pages on the left
/TwoColumnRight Show pages in two columns, odd-numbered pages on the right
/TwoPageLeft Show two pages at a time, odd-numbered pages on the left
/TwoPageRight Show two pages at a time, odd-numbered pages on the right
"""
self.output.setPageLayout(layout)
def setPageMode(self, mode):
"""
Set the page mode.
:param str mode: The page mode to use.
Valid modes are:
/UseNone Do not show outlines or thumbnails panels
/UseOutlines Show outlines (aka bookmarks) panel
/UseThumbs Show page thumbnails panel
/FullScreen Fullscreen view
/UseOC Show Optional Content Group (OCG) panel
/UseAttachments Show attachments panel
"""
self.output.setPageMode(mode)
def _trim_dests(self, pdf, dests, pages):
"""
Removes any named destinations that are not a part of the specified
page set.
"""
new_dests = []
prev_header_added = True
for k, o in list(dests.items()):
for j in range(*pages):
if pdf.getPage(j).getObject() == o['/Page'].getObject():
o[NameObject('/Page')] = o['/Page'].getObject()
assert str_(k) == str_(o['/Title'])
new_dests.append(o)
break
return new_dests
def _trim_outline(self, pdf, outline, pages):
"""
Removes any outline/bookmark entries that are not a part of the
specified page set.
"""
new_outline = []
prev_header_added = True
for i, o in enumerate(outline):
if isinstance(o, list):
sub = self._trim_outline(pdf, o, pages)
if sub:
if not prev_header_added:
new_outline.append(outline[i-1])
new_outline.append(sub)
else:
prev_header_added = False
for j in range(*pages):
if pdf.getPage(j).getObject() == o['/Page'].getObject():
o[NameObject('/Page')] = o['/Page'].getObject()
new_outline.append(o)
prev_header_added = True
break
return new_outline
def _write_dests(self):
dests = self.named_dests
for v in dests:
pageno = None
pdf = None
if '/Page' in v:
for i, p in enumerate(self.pages):
if p.id == v['/Page']:
v[NameObject('/Page')] = p.out_pagedata
pageno = i
pdf = p.src
break
if pageno != None:
self.output.addNamedDestinationObject(v)
def _write_bookmarks(self, bookmarks=None, parent=None):
if bookmarks == None:
bookmarks = self.bookmarks
last_added = None
for b in bookmarks:
if isinstance(b, list):
self._write_bookmarks(b, last_added)
continue
pageno = None
pdf = None
if '/Page' in b:
for i, p in enumerate(self.pages):
if p.id == b['/Page']:
#b[NameObject('/Page')] = p.out_pagedata
args = [NumberObject(p.id), NameObject(b['/Type'])]
#nothing more to add
#if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
if '/Top' in b and not isinstance(b['/Top'], NullObject):
args.append(FloatObject(b['/Top']))
else:
args.append(FloatObject(0))
del b['/Top']
elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
if '/Left' in b and not isinstance(b['/Left'], NullObject):
args.append(FloatObject(b['/Left']))
else:
args.append(FloatObject(0))
del b['/Left']
elif b['/Type'] == '/XYZ':
if '/Left' in b and not isinstance(b['/Left'], NullObject):
args.append(FloatObject(b['/Left']))
else:
args.append(FloatObject(0))
if '/Top' in b and not isinstance(b['/Top'], NullObject):
args.append(FloatObject(b['/Top']))
else:
args.append(FloatObject(0))
if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
args.append(FloatObject(b['/Zoom']))
else:
args.append(FloatObject(0))
del b['/Top'], b['/Zoom'], b['/Left']
elif b['/Type'] == '/FitR':
if '/Left' in b and not isinstance(b['/Left'], NullObject):
args.append(FloatObject(b['/Left']))
else:
args.append(FloatObject(0))
if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
args.append(FloatObject(b['/Bottom']))
else:
args.append(FloatObject(0))
if '/Right' in b and not isinstance(b['/Right'], NullObject):
args.append(FloatObject(b['/Right']))
else:
args.append(FloatObject(0))
if '/Top' in b and not isinstance(b['/Top'], NullObject):
args.append(FloatObject(b['/Top']))
else:
args.append(FloatObject(0))
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
pageno = i
pdf = p.src
break
if pageno != None:
del b['/Page'], b['/Type']
last_added = self.output.addBookmarkDict(b, parent)
def _associate_dests_to_pages(self, pages):
for nd in self.named_dests:
pageno = None
np = nd['/Page']
if isinstance(np, NumberObject):
continue
for p in pages:
if np.getObject() == p.pagedata.getObject():
pageno = p.id
if pageno != None:
nd[NameObject('/Page')] = NumberObject(pageno)
else:
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
if bookmarks == None:
bookmarks = self.bookmarks
for b in bookmarks:
if isinstance(b, list):
self._associate_bookmarks_to_pages(pages, b)
continue
pageno = None
bp = b['/Page']
if isinstance(bp, NumberObject):
continue
for p in pages:
if bp.getObject() == p.pagedata.getObject():
pageno = p.id
if pageno != None:
b[NameObject('/Page')] = NumberObject(pageno)
else:
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
def findBookmark(self, bookmark, root=None):
if root == None:
root = self.bookmarks
for i, b in enumerate(root):
if isinstance(b, list):
res = self.findBookmark(bookmark, b)
if res:
return [i] + res
elif b == bookmark or b['/Title'] == bookmark:
return [i]
return None
def addBookmark(self, title, pagenum, parent=None):
"""
Add a bookmark to this PDF file.
:param str title: Title to use for this bookmark.
:param int pagenum: Page number this bookmark will point to.
:param parent: A reference to a parent bookmark to create nested
bookmarks.
"""
if parent == None:
iloc = [len(self.bookmarks)-1]
elif isinstance(parent, list):
iloc = parent
else:
iloc = self.findBookmark(parent)
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
if parent == None:
self.bookmarks.append(dest)
else:
bmparent = self.bookmarks
for i in iloc[:-1]:
bmparent = bmparent[i]
npos = iloc[-1]+1
if npos < len(bmparent) and isinstance(bmparent[npos], list):
bmparent[npos].append(dest)
else:
bmparent.insert(npos, [dest])
return dest
def addNamedDestination(self, title, pagenum):
"""
Add a destination to the output.
:param str title: Title to use
:param int pagenum: Page number this destination points at.
"""
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
self.named_dests.append(dest)
class OutlinesObject(list):
def __init__(self, pdf, tree, parent=None):
list.__init__(self)
self.tree = tree
self.pdf = pdf
self.parent = parent
def remove(self, index):
obj = self[index]
del self[index]
self.tree.removeChild(obj)
def add(self, title, pagenum):
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
action = DictionaryObject()
action.update({
NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
NameObject('/S') : NameObject('/GoTo')
})
actionRef = self.pdf._addObject(action)
bookmark = TreeObject()
bookmark.update({
NameObject('/A'): actionRef,
NameObject('/Title'): createStringObject(title),
})
self.pdf._addObject(bookmark)
self.tree.addChild(bookmark)
def removeAll(self):
for child in [x for x in self.tree.children()]:
self.tree.removeChild(child)
self.pop()
endesive-2.19.1/endesive/pdf/PyPDF2/pagerange.py 0000664 0000000 0000000 00000012636 15042366745 0021314 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Representation and utils for ranges of PDF file pages.
Copyright (c) 2014, Steve Witham .
All rights reserved. This software is available under a BSD license;
see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
"""
import re
from .utils import isString
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
# groups: 12 34 5 6 7 8
class ParseError(Exception):
pass
PAGE_RANGE_HELP = """Remember, page indices start with zero.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
"""
class PageRange(object):
"""
A slice-like representation of a range of page indices,
i.e. page numbers, only starting at zero.
The syntax is like what you would put between brackets [ ].
The slice is one of the few Python types that can't be subclassed,
but this class converts to and from slices, and allows similar use.
o PageRange(str) parses a string representing a page range.
o PageRange(slice) directly "imports" a slice.
o to_slice() gives the equivalent slice.
o str() and repr() allow printing.
o indices(n) is like slice.indices(n).
"""
def __init__(self, arg):
"""
Initialize with either a slice -- giving the equivalent page range,
or a PageRange object -- making a copy,
or a string like
"int", "[int]:[int]" or "[int]:[int]:[int]",
where the brackets indicate optional ints.
{page_range_help}
Note the difference between this notation and arguments to slice():
slice(3) means the first three pages;
PageRange("3") means the range of only the fourth page.
However PageRange(slice(3)) means the first three pages.
"""
if isinstance(arg, slice):
self._slice = arg
return
if isinstance(arg, PageRange):
self._slice = arg.to_slice()
return
m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
if not m:
raise ParseError(arg)
elif m.group(2):
# Special case: just an int means a range of one page.
start = int(m.group(2))
stop = start + 1 if start != -1 else None
self._slice = slice(start, stop)
else:
self._slice = slice(*[int(g) if g else None
for g in m.group(4, 6, 8)])
# Just formatting this when there is __doc__ for __init__
if __init__.__doc__:
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
@staticmethod
def valid(input):
""" True if input is a valid initializer for a PageRange. """
return isinstance(input, slice) or \
isinstance(input, PageRange) or \
(isString(input)
and bool(re.match(PAGE_RANGE_RE, input)))
def to_slice(self):
""" Return the slice equivalent of this page range. """
return self._slice
def __str__(self):
""" A string like "1:2:3". """
s = self._slice
if s.step == None:
if s.start != None and s.stop == s.start + 1:
return str(s.start)
indices = s.start, s.stop
else:
indices = s.start, s.stop, s.step
return ':'.join("" if i == None else str(i) for i in indices)
def __repr__(self):
""" A string like "PageRange('1:2:3')". """
return "PageRange(" + repr(str(self)) + ")"
def indices(self, n):
"""
n is the length of the list of pages to choose from.
Returns arguments for range(). See help(slice.indices).
"""
return self._slice.indices(n)
PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
def parse_filename_page_ranges(args):
"""
Given a list of filenames and page ranges, return a list of
(filename, page_range) pairs.
First arg must be a filename; other ags are filenames, page-range
expressions, slice objects, or PageRange objects.
A filename not followed by a page range indicates all pages of the file.
"""
pairs = []
pdf_filename = None
did_page_range = False
for arg in args + [None]:
if PageRange.valid(arg):
if not pdf_filename:
raise ValueError("The first argument must be a filename, " \
"not a page range.")
pairs.append( (pdf_filename, PageRange(arg)) )
did_page_range = True
else:
# New filename or end of list--do all of the previous file?
if pdf_filename and not did_page_range:
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
pdf_filename = arg
did_page_range = False
return pairs
endesive-2.19.1/endesive/pdf/PyPDF2/pdf.py 0000664 0000000 0000000 00000374464 15042366745 0020146 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
#
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
A pure-Python PDF library with an increasing number of capabilities.
See README for links to FAQ, documentation, homepage, etc.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
__maintainer__ = "Phaseit, Inc."
__maintainer_email = "PyPDF2@phaseit.net"
import string
import math
import struct
import sys
import uuid
from sys import version_info
if version_info < ( 3, 0 ):
from cStringIO import StringIO
else:
from io import StringIO
if version_info < ( 3, 0 ):
BytesIO = StringIO
else:
from io import BytesIO
from . import filters
from . import utils
import warnings
import codecs
from .generic import *
from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning
if version_info < ( 2, 4 ):
from sets import ImmutableSet as frozenset
if version_info < ( 2, 5 ):
from md5 import md5
else:
from hashlib import md5
import uuid
class PdfFileWriter(object):
"""
This class supports writing PDF files out, given pages produced by another
class (typically :class:`PdfFileReader`).
"""
def __init__(self):
self._header = b_("%PDF-1.3")
self._objects = [] # array of indirect objects
# The root of our page tree node.
pages = DictionaryObject()
pages.update({
NameObject("/Type"): NameObject("/Pages"),
NameObject("/Count"): NumberObject(0),
NameObject("/Kids"): ArrayObject(),
})
self._pages = self._addObject(pages)
# info object
info = DictionaryObject()
info.update({
NameObject("/Producer"): createStringObject(codecs.BOM_UTF16_BE + u_("PyPDF2").encode('utf-16be'))
})
self._info = self._addObject(info)
# root object
root = DictionaryObject()
root.update({
NameObject("/Type"): NameObject("/Catalog"),
NameObject("/Pages"): self._pages,
})
self._root = None
self._root_object = root
def _addObject(self, obj):
self._objects.append(obj)
return IndirectObject(len(self._objects), 0, self)
def getObject(self, ido):
if ido.pdf != self:
raise ValueError("pdf must be self")
return self._objects[ido.idnum - 1]
def _addPage(self, page, action):
assert page["/Type"] == "/Page"
page[NameObject("/Parent")] = self._pages
page = self._addObject(page)
pages = self.getObject(self._pages)
action(pages["/Kids"], page)
pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
def addPage(self, page):
"""
Adds a page to this PDF file. The page is usually acquired from a
:class:`PdfFileReader` instance.
:param PageObject page: The page to add to the document. Should be
an instance of :class:`PageObject`
"""
self._addPage(page, list.append)
def insertPage(self, page, index=0):
"""
Insert a page in this PDF file. The page is usually acquired from a
:class:`PdfFileReader` instance.
:param PageObject page: The page to add to the document. This
argument should be an instance of :class:`PageObject`.
:param int index: Position at which the page will be inserted.
"""
self._addPage(page, lambda l, p: l.insert(index, p))
def getPage(self, pageNumber):
"""
Retrieves a page by number from this PDF file.
:param int pageNumber: The page number to retrieve
(pages begin at zero)
:return: the page at the index given by *pageNumber*
:rtype: :class:`PageObject`
"""
pages = self.getObject(self._pages)
# XXX: crude hack
return pages["/Kids"][pageNumber].getObject()
def getNumPages(self):
"""
:return: the number of pages.
:rtype: int
"""
pages = self.getObject(self._pages)
return int(pages[NameObject("/Count")])
def addBlankPage(self, width=None, height=None):
"""
Appends a blank page to this PDF file and returns it. If no page size
is specified, use the size of the last page.
:param float width: The width of the new page expressed in default user
space units.
:param float height: The height of the new page expressed in default
user space units.
:return: the newly appended page
:rtype: :class:`PageObject`
:raises PageSizeNotDefinedError: if width and height are not defined
and previous page does not exist.
"""
page = PageObject.createBlankPage(self, width, height)
self.addPage(page)
return page
def insertBlankPage(self, width=None, height=None, index=0):
"""
Inserts a blank page to this PDF file and returns it. If no page size
is specified, use the size of the last page.
:param float width: The width of the new page expressed in default user
space units.
:param float height: The height of the new page expressed in default
user space units.
:param int index: Position to add the page.
:return: the newly appended page
:rtype: :class:`PageObject`
:raises PageSizeNotDefinedError: if width and height are not defined
and previous page does not exist.
"""
if width is None or height is None and \
(self.getNumPages() - 1) >= index:
oldpage = self.getPage(index)
width = oldpage.mediaBox.getWidth()
height = oldpage.mediaBox.getHeight()
page = PageObject.createBlankPage(self, width, height)
self.insertPage(page, index)
return page
def addJS(self, javascript):
"""
Add Javascript which will launch upon opening this PDF.
:param str javascript: Your Javascript.
>>> output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
# Example: This will launch the print window when the PDF is opened.
"""
js = DictionaryObject()
js.update({
NameObject("/Type"): NameObject("/Action"),
NameObject("/S"): NameObject("/JavaScript"),
NameObject("/JS"): NameObject("(%s)" % javascript)
})
js_indirect_object = self._addObject(js)
# We need a name for parameterized javascript in the pdf file, but it can be anything.
js_string_name = str(uuid.uuid4())
js_name_tree = DictionaryObject()
js_name_tree.update({
NameObject("/JavaScript"): DictionaryObject({
NameObject("/Names"): ArrayObject([createStringObject(js_string_name), js_indirect_object])
})
})
self._addObject(js_name_tree)
self._root_object.update({
NameObject("/OpenAction"): js_indirect_object,
NameObject("/Names"): js_name_tree
})
def addAttachment(self, fname, fdata):
"""
Embed a file inside the PDF.
:param str fname: The filename to display.
:param str fdata: The data in the file.
Reference:
https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
Section 7.11.3
"""
# We need 3 entries:
# * The file's data
# * The /Filespec entry
# * The file's name, which goes in the Catalog
# The entry for the file
""" Sample:
8 0 obj
<<
/Length 12
/Type /EmbeddedFile
>>
stream
Hello world!
endstream
endobj
"""
file_entry = DecodedStreamObject()
file_entry.setData(fdata)
file_entry.update({
NameObject("/Type"): NameObject("/EmbeddedFile")
})
# The Filespec entry
""" Sample:
7 0 obj
<<
/Type /Filespec
/F (hello.txt)
/EF << /F 8 0 R >>
>>
"""
efEntry = DictionaryObject()
efEntry.update({ NameObject("/F"):file_entry })
filespec = DictionaryObject()
filespec.update({
NameObject("/Type"): NameObject("/Filespec"),
NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject
NameObject("/EF"): efEntry
})
# Then create the entry for the root, as it needs a reference to the Filespec
""" Sample:
1 0 obj
<<
/Type /Catalog
/Outlines 2 0 R
/Pages 3 0 R
/Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
>>
endobj
"""
embeddedFilesNamesDictionary = DictionaryObject()
embeddedFilesNamesDictionary.update({
NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
})
embeddedFilesDictionary = DictionaryObject()
embeddedFilesDictionary.update({
NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
})
# Update the root
self._root_object.update({
NameObject("/Names"): embeddedFilesDictionary
})
def appendPagesFromReader(self, reader, after_page_append=None):
"""
Copy pages from reader to writer. Includes an optional callback parameter
which is invoked after pages are appended to the writer.
:param reader: a PdfFileReader object from which to copy page
annotations to this writer object. The writer's annots
will then be updated
:callback after_page_append (function): Callback function that is invoked after
each page is appended to the writer. Callback signature:
:param writer_pageref (PDF page reference): Reference to the page
appended to the writer.
"""
# Get page count from writer and reader
reader_num_pages = reader.getNumPages()
writer_num_pages = self.getNumPages()
# Copy pages from reader to writer
for rpagenum in range(0, reader_num_pages):
reader_page = reader.getPage(rpagenum)
self.addPage(reader_page)
writer_page = self.getPage(writer_num_pages+rpagenum)
# Trigger callback, pass writer page as parameter
if callable(after_page_append): after_page_append(writer_page)
def updatePageFormFieldValues(self, page, fields):
'''
Update the form field values for a given page from a fields dictionary.
Copy field texts and values from fields to page.
:param page: Page reference from PDF writer where the annotations
and field data will be updated.
:param fields: a Python dictionary of field names (/T) and text
values (/V)
'''
# Iterate through pages, update field values
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in fields:
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/V"): TextStringObject(fields[field])
})
def cloneReaderDocumentRoot(self, reader):
'''
Copy the reader document root to the writer.
:param reader: PdfFileReader from the document root should be copied.
:callback after_page_append
'''
self._root_object = reader.trailer['/Root']
def cloneDocumentFromReader(self, reader, after_page_append=None):
'''
Create a copy (clone) of a document from a PDF file reader
:param reader: PDF file reader instance from which the clone
should be created.
:callback after_page_append (function): Callback function that is invoked after
each page is appended to the writer. Signature includes a reference to the
appended page (delegates to appendPagesFromReader). Callback signature:
:param writer_pageref (PDF page reference): Reference to the page just
appended to the document.
'''
self.cloneReaderDocumentRoot(reader)
self.appendPagesFromReader(reader, after_page_append)
def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
"""
Encrypt this PDF file with the PDF Standard encryption handler.
:param str user_pwd: The "user password", which allows for opening
and reading the PDF file with the restrictions provided.
:param str owner_pwd: The "owner password", which allows for
opening the PDF files without any restrictions. By default,
the owner password is the same as the user password.
:param bool use_128bit: flag as to whether to use 128bit
encryption. When false, 40bit encryption will be used. By default,
this flag is on.
"""
import time, random
if owner_pwd == None:
owner_pwd = user_pwd
if use_128bit:
V = 2
rev = 3
keylen = int(128 / 8)
else:
V = 1
rev = 2
keylen = int(40 / 8)
# permit everything:
P = -1
O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
ID_1 = ByteStringObject(md5(b_(repr(time.time()))).digest())
ID_2 = ByteStringObject(md5(b_(repr(random.random()))).digest())
self._ID = ArrayObject((ID_1, ID_2))
if rev == 2:
U, key = _alg34(user_pwd, O, P, ID_1)
else:
assert rev == 3
U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, False)
encrypt = DictionaryObject()
encrypt[NameObject("/Filter")] = NameObject("/Standard")
encrypt[NameObject("/V")] = NumberObject(V)
if V == 2:
encrypt[NameObject("/Length")] = NumberObject(keylen * 8)
encrypt[NameObject("/R")] = NumberObject(rev)
encrypt[NameObject("/O")] = ByteStringObject(O)
encrypt[NameObject("/U")] = ByteStringObject(U)
encrypt[NameObject("/P")] = NumberObject(P)
self._encrypt = self._addObject(encrypt)
self._encrypt_key = key
def write(self, stream):
"""
Writes the collection of pages added to this object out as a PDF file.
:param stream: An object to write the file to. The object must support
the write method and the tell method, similar to a file object.
"""
if hasattr(stream, 'mode') and 'b' not in stream.mode:
warnings.warn("File <%s> to write to is not in binary mode. It may not be written to correctly." % stream.name)
debug = False
import struct
if not self._root:
self._root = self._addObject(self._root_object)
externalReferenceMap = {}
# PDF objects sometimes have circular references to their /Page objects
# inside their object tree (for example, annotations). Those will be
# indirect references to objects that we've recreated in this PDF. To
# address this problem, PageObject's store their original object
# reference number, and we add it to the external reference map before
# we sweep for indirect references. This forces self-page-referencing
# trees to reference the correct new object location, rather than
# copying in a new copy of the page object.
for objIndex in range(len(self._objects)):
obj = self._objects[objIndex]
if isinstance(obj, PageObject) and obj.indirectRef != None:
data = obj.indirectRef
if data.pdf not in externalReferenceMap:
externalReferenceMap[data.pdf] = {}
if data.generation not in externalReferenceMap[data.pdf]:
externalReferenceMap[data.pdf][data.generation] = {}
externalReferenceMap[data.pdf][data.generation][data.idnum] = IndirectObject(objIndex + 1, 0, self)
self.stack = []
if debug: print(("ERM:", externalReferenceMap, "root:", self._root))
self._sweepIndirectReferences(externalReferenceMap, self._root)
del self.stack
# Begin writing:
object_positions = []
stream.write(self._header + b_("\n"))
stream.write(b_("%\xE2\xE3\xCF\xD3\n"))
for i in range(len(self._objects)):
idnum = (i + 1)
obj = self._objects[i]
object_positions.append(stream.tell())
stream.write(b_(str(idnum) + " 0 obj\n"))
key = None
if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum:
pack1 = struct.pack("` for details.
"""
pageRef = self.getObject(self._pages)['/Kids'][pagenum]
action = DictionaryObject()
zoomArgs = []
for a in args:
if a is not None:
zoomArgs.append(NumberObject(a))
else:
zoomArgs.append(NullObject())
dest = Destination(NameObject("/"+title + " bookmark"), pageRef, NameObject(fit), *zoomArgs)
destArray = dest.getDestArray()
action.update({
NameObject('/D') : destArray,
NameObject('/S') : NameObject('/GoTo')
})
actionRef = self._addObject(action)
outlineRef = self.getOutlineRoot()
if parent == None:
parent = outlineRef
bookmark = TreeObject()
bookmark.update({
NameObject('/A'): actionRef,
NameObject('/Title'): createStringObject(title),
})
if color is not None:
bookmark.update({NameObject('/C'): ArrayObject([FloatObject(c) for c in color])})
format = 0
if italic:
format += 1
if bold:
format += 2
if format:
bookmark.update({NameObject('/F'): NumberObject(format)})
bookmarkRef = self._addObject(bookmark)
parent = parent.getObject()
parent.addChild(bookmarkRef, self)
return bookmarkRef
def addNamedDestinationObject(self, dest):
destRef = self._addObject(dest)
nd = self.getNamedDestRoot()
nd.extend([dest['/Title'], destRef])
return destRef
def addNamedDestination(self, title, pagenum):
pageRef = self.getObject(self._pages)['/Kids'][pagenum]
dest = DictionaryObject()
dest.update({
NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
NameObject('/S') : NameObject('/GoTo')
})
destRef = self._addObject(dest)
nd = self.getNamedDestRoot()
nd.extend([title, destRef])
return destRef
def removeLinks(self):
"""
Removes links and annotations from this output.
"""
pages = self.getObject(self._pages)['/Kids']
for page in pages:
pageRef = self.getObject(page)
if "/Annots" in pageRef:
del pageRef['/Annots']
def removeImages(self, ignoreByteStringObject=False):
"""
Removes images from this output.
:param bool ignoreByteStringObject: optional parameter
to ignore ByteString Objects.
"""
pages = self.getObject(self._pages)['/Kids']
for j in range(len(pages)):
page = pages[j]
pageRef = self.getObject(page)
content = pageRef['/Contents'].getObject()
if not isinstance(content, ContentStream):
content = ContentStream(content, pageRef)
_operations = []
seq_graphics = False
for operands, operator in content.operations:
if operator == b_('Tj'):
text = operands[0]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[0] = TextStringObject()
elif operator == b_("'"):
text = operands[0]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[0] = TextStringObject()
elif operator == b_('"'):
text = operands[2]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[2] = TextStringObject()
elif operator == b_("TJ"):
for i in range(len(operands[0])):
if ignoreByteStringObject:
if not isinstance(operands[0][i], TextStringObject):
operands[0][i] = TextStringObject()
if operator == b_('q'):
seq_graphics = True
if operator == b_('Q'):
seq_graphics = False
if seq_graphics:
if operator in [b_('cm'), b_('w'), b_('J'), b_('j'), b_('M'), b_('d'), b_('ri'), b_('i'),
b_('gs'), b_('W'), b_('b'), b_('s'), b_('S'), b_('f'), b_('F'), b_('n'), b_('m'), b_('l'),
b_('c'), b_('v'), b_('y'), b_('h'), b_('B'), b_('Do'), b_('sh')]:
continue
if operator == b_('re'):
continue
_operations.append((operands, operator))
content.operations = _operations
pageRef.__setitem__(NameObject('/Contents'), content)
def removeText(self, ignoreByteStringObject=False):
"""
Removes images from this output.
:param bool ignoreByteStringObject: optional parameter
to ignore ByteString Objects.
"""
pages = self.getObject(self._pages)['/Kids']
for j in range(len(pages)):
page = pages[j]
pageRef = self.getObject(page)
content = pageRef['/Contents'].getObject()
if not isinstance(content, ContentStream):
content = ContentStream(content, pageRef)
for operands,operator in content.operations:
if operator == b_('Tj'):
text = operands[0]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[0] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
operands[0] = TextStringObject()
elif operator == b_("'"):
text = operands[0]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[0] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
operands[0] = TextStringObject()
elif operator == b_('"'):
text = operands[2]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[2] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
operands[2] = TextStringObject()
elif operator == b_("TJ"):
for i in range(len(operands[0])):
if not ignoreByteStringObject:
if isinstance(operands[0][i], TextStringObject):
operands[0][i] = TextStringObject()
else:
if isinstance(operands[0][i], TextStringObject) or \
isinstance(operands[0][i], ByteStringObject):
operands[0][i] = TextStringObject()
pageRef.__setitem__(NameObject('/Contents'), content)
def addURI(self, pagenum, uri, rect, border=None):
"""
Add an URI from a rectangular area to the specified page.
This uses the basic structure of AddLink
:param int pagenum: index of the page on which to place the URI action.
:param int uri: string -- uri of resource to link to.
:param rect: :class:`RectangleObject` or array of four
integers specifying the clickable rectangular area
``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``.
:param border: if provided, an array describing border-drawing
properties. See the PDF spec for details. No border will be
drawn if this argument is omitted.
REMOVED FIT/ZOOM ARG
-John Mulligan
"""
pageLink = self.getObject(self._pages)['/Kids'][pagenum]
pageRef = self.getObject(pageLink)
if border is not None:
borderArr = [NameObject(n) for n in border[:3]]
if len(border) == 4:
dashPattern = ArrayObject([NameObject(n) for n in border[3]])
borderArr.append(dashPattern)
else:
borderArr = [NumberObject(2)] * 3
if isString(rect):
rect = NameObject(rect)
elif isinstance(rect, RectangleObject):
pass
else:
rect = RectangleObject(rect)
lnk2 = DictionaryObject()
lnk2.update({
NameObject('/S'): NameObject('/URI'),
NameObject('/URI'): TextStringObject(uri)
});
lnk = DictionaryObject()
lnk.update({
NameObject('/Type'): NameObject('/Annot'),
NameObject('/Subtype'): NameObject('/Link'),
NameObject('/P'): pageLink,
NameObject('/Rect'): rect,
NameObject('/H'): NameObject('/I'),
NameObject('/Border'): ArrayObject(borderArr),
NameObject('/A'): lnk2
})
lnkRef = self._addObject(lnk)
if "/Annots" in pageRef:
pageRef['/Annots'].append(lnkRef)
else:
pageRef[NameObject('/Annots')] = ArrayObject([lnkRef])
def addLink(self, pagenum, pagedest, rect, border=None, fit='/Fit', *args):
"""
Add an internal link from a rectangular area to the specified page.
:param int pagenum: index of the page on which to place the link.
:param int pagedest: index of the page to which the link should go.
:param rect: :class:`RectangleObject` or array of four
integers specifying the clickable rectangular area
``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``.
:param border: if provided, an array describing border-drawing
properties. See the PDF spec for details. No border will be
drawn if this argument is omitted.
:param str fit: Page fit or 'zoom' option (see below). Additional arguments may need
to be supplied. Passing ``None`` will be read as a null value for that coordinate.
Valid zoom arguments (see Table 8.2 of the PDF 1.7 reference for details):
/Fit No additional arguments
/XYZ [left] [top] [zoomFactor]
/FitH [top]
/FitV [left]
/FitR [left] [bottom] [right] [top]
/FitB No additional arguments
/FitBH [top]
/FitBV [left]
"""
pageLink = self.getObject(self._pages)['/Kids'][pagenum]
pageDest = self.getObject(self._pages)['/Kids'][pagedest] #TODO: switch for external link
pageRef = self.getObject(pageLink)
if border is not None:
borderArr = [NameObject(n) for n in border[:3]]
if len(border) == 4:
dashPattern = ArrayObject([NameObject(n) for n in border[3]])
borderArr.append(dashPattern)
else:
borderArr = [NumberObject(0)] * 3
if isString(rect):
rect = NameObject(rect)
elif isinstance(rect, RectangleObject):
pass
else:
rect = RectangleObject(rect)
zoomArgs = []
for a in args:
if a is not None:
zoomArgs.append(NumberObject(a))
else:
zoomArgs.append(NullObject())
dest = Destination(NameObject("/LinkName"), pageDest, NameObject(fit), *zoomArgs) #TODO: create a better name for the link
destArray = dest.getDestArray()
lnk = DictionaryObject()
lnk.update({
NameObject('/Type'): NameObject('/Annot'),
NameObject('/Subtype'): NameObject('/Link'),
NameObject('/P'): pageLink,
NameObject('/Rect'): rect,
NameObject('/Border'): ArrayObject(borderArr),
NameObject('/Dest'): destArray
})
lnkRef = self._addObject(lnk)
if "/Annots" in pageRef:
pageRef['/Annots'].append(lnkRef)
else:
pageRef[NameObject('/Annots')] = ArrayObject([lnkRef])
_valid_layouts = ['/NoLayout', '/SinglePage', '/OneColumn', '/TwoColumnLeft', '/TwoColumnRight', '/TwoPageLeft', '/TwoPageRight']
def getPageLayout(self):
"""
Get the page layout.
See :meth:`setPageLayout()` for a description of valid layouts.
:return: Page layout currently being used.
:rtype: str, None if not specified
"""
try:
return self._root_object['/PageLayout']
except KeyError:
return None
def setPageLayout(self, layout):
"""
Set the page layout
:param str layout: The page layout to be used
Valid layouts are:
/NoLayout Layout explicitly not specified
/SinglePage Show one page at a time
/OneColumn Show one column at a time
/TwoColumnLeft Show pages in two columns, odd-numbered pages on the left
/TwoColumnRight Show pages in two columns, odd-numbered pages on the right
/TwoPageLeft Show two pages at a time, odd-numbered pages on the left
/TwoPageRight Show two pages at a time, odd-numbered pages on the right
"""
if not isinstance(layout, NameObject):
if layout not in self._valid_layouts:
warnings.warn("Layout should be one of: {}".format(', '.join(self._valid_layouts)))
layout = NameObject(layout)
self._root_object.update({NameObject('/PageLayout'): layout})
pageLayout = property(getPageLayout, setPageLayout)
"""Read and write property accessing the :meth:`getPageLayout()`
and :meth:`setPageLayout()` methods."""
_valid_modes = ['/UseNone', '/UseOutlines', '/UseThumbs', '/FullScreen', '/UseOC', '/UseAttachments']
def getPageMode(self):
"""
Get the page mode.
See :meth:`setPageMode()` for a description
of valid modes.
:return: Page mode currently being used.
:rtype: str, None if not specified
"""
try:
return self._root_object['/PageMode']
except KeyError:
return None
def setPageMode(self, mode):
"""
Set the page mode.
:param str mode: The page mode to use.
Valid modes are:
/UseNone Do not show outlines or thumbnails panels
/UseOutlines Show outlines (aka bookmarks) panel
/UseThumbs Show page thumbnails panel
/FullScreen Fullscreen view
/UseOC Show Optional Content Group (OCG) panel
/UseAttachments Show attachments panel
"""
if not isinstance(mode, NameObject):
if mode not in self._valid_modes:
warnings.warn("Mode should be one of: {}".format(', '.join(self._valid_modes)))
mode = NameObject(mode)
self._root_object.update({NameObject('/PageMode'): mode})
pageMode = property(getPageMode, setPageMode)
"""Read and write property accessing the :meth:`getPageMode()`
and :meth:`setPageMode()` methods."""
class PdfFileReader(object):
"""
Initializes a PdfFileReader object. This operation can take some time, as
the PDF stream's cross-reference tables are read into memory.
:param stream: A File object or an object that supports the standard read
and seek methods similar to a File object. Could also be a
string representing a path to a PDF file.
:param bool strict: Determines whether user should be warned of all
problems and also causes some correctable problems to be fatal.
Defaults to ``True``.
:param warndest: Destination for logging warnings (defaults to
``sys.stderr``).
:param bool overwriteWarnings: Determines whether to override Python's
``warnings.py`` module with a custom implementation (defaults to
``True``).
"""
def __init__(self, stream, strict=True, warndest = None, overwriteWarnings = True):
if overwriteWarnings:
# have to dynamically override the default showwarning since there are no
# public methods that specify the 'file' parameter
def _showwarning(message, category, filename, lineno, file=warndest, line=None):
if file is None:
file = sys.stderr
try:
file.write(formatWarning(message, category, filename, lineno, line))
except IOError:
pass
warnings.showwarning = _showwarning
self.strict = strict
self.flattenedPages = None
self.resolvedObjects = {}
self.xrefIndex = 0
self.xrefstream = False
self.startxref = 0
self._pageId2Num = None # map page IndirectRef number to Page Number
if hasattr(stream, 'mode') and 'b' not in stream.mode:
warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
if isString(stream):
fileobj = open(stream, 'rb')
stream = BytesIO(b_(fileobj.read()))
fileobj.close()
self.read(stream)
self.stream = stream
self._override_encryption = False
def getDocumentInfo(self):
"""
Retrieves the PDF file's document information dictionary, if it exists.
Note that some PDF files use metadata streams instead of docinfo
dictionaries, and these metadata streams will not be accessed by this
function.
:return: the document information of this PDF file
:rtype: :class:`DocumentInformation` or ``None`` if none exists.
"""
if "/Info" not in self.trailer:
return None
obj = self.trailer['/Info']
retval = DocumentInformation()
retval.update(obj)
return retval
documentInfo = property(lambda self: self.getDocumentInfo(), None, None)
"""Read-only property that accesses the :meth:`getDocumentInfo()` function."""
def getXmpMetadata(self):
"""
Retrieves XMP (Extensible Metadata Platform) data from the PDF document
root.
:return: a :class:`XmpInformation`
instance that can be used to access XMP metadata from the document.
:rtype: :class:`XmpInformation` or
``None`` if no metadata was found on the document root.
"""
try:
self._override_encryption = True
return self.trailer["/Root"].getXmpMetadata()
finally:
self._override_encryption = False
xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
"""
Read-only property that accesses the
:meth:`getXmpMetadata()` function.
"""
def getNumPages(self):
"""
Calculates the number of pages in this PDF file.
:return: number of pages
:rtype: int
:raises PdfReadError: if file is encrypted and restrictions prevent
this action.
"""
# Flattened pages will not work on an Encrypted PDF;
# the PDF file's page count is used in this case. Otherwise,
# the original method (flattened page count) is used.
if self.isEncrypted:
try:
self._override_encryption = True
self.decrypt('')
return self.trailer["/Root"]["/Pages"]["/Count"]
except:
raise utils.PdfReadError("File has not been decrypted")
finally:
self._override_encryption = False
else:
if self.flattenedPages == None:
self._flatten()
return len(self.flattenedPages)
numPages = property(lambda self: self.getNumPages(), None, None)
"""
Read-only property that accesses the
:meth:`getNumPages()` function.
"""
def getPage(self, pageNumber):
"""
Retrieves a page by number from this PDF file.
:param int pageNumber: The page number to retrieve
(pages begin at zero)
:return: a :class:`PageObject` instance.
:rtype: :class:`PageObject`
"""
## ensure that we're not trying to access an encrypted PDF
#assert not self.trailer.has_key("/Encrypt")
if self.flattenedPages == None:
self._flatten()
return self.flattenedPages[pageNumber]
namedDestinations = property(lambda self:
self.getNamedDestinations(), None, None)
"""
Read-only property that accesses the
:meth:`getNamedDestinations()` function.
"""
# A select group of relevant field attributes. For the complete list,
# see section 8.6.2 of the PDF 1.7 reference.
def getFields(self, tree = None, retval = None, fileobj = None):
"""
Extracts field data if this PDF contains interactive form fields.
The *tree* and *retval* parameters are for recursive use.
:param fileobj: A file object (usually a text file) to write
a report to on all interactive form fields found.
:return: A dictionary where each key is a field name, and each
value is a :class:`Field` object. By
default, the mapping name is used for keys.
:rtype: dict, or ``None`` if form data could not be located.
"""
fieldAttributes = {"/FT" : "Field Type", "/Parent" : "Parent",
"/T" : "Field Name", "/TU" : "Alternate Field Name",
"/TM" : "Mapping Name", "/Ff" : "Field Flags",
"/V" : "Value", "/DV" : "Default Value"}
if retval == None:
retval = {}
catalog = self.trailer["/Root"]
# get the AcroForm tree
if "/AcroForm" in catalog:
tree = catalog["/AcroForm"]
else:
return None
if tree == None:
return retval
self._checkKids(tree, retval, fileobj)
for attr in fieldAttributes:
if attr in tree:
# Tree is a field
self._buildField(tree, retval, fileobj, fieldAttributes)
break
if "/Fields" in tree:
fields = tree["/Fields"]
for f in fields:
field = f.getObject()
self._buildField(field, retval, fileobj, fieldAttributes)
return retval
def _buildField(self, field, retval, fileobj, fieldAttributes):
self._checkKids(field, retval, fileobj)
try:
key = field["/TM"]
except KeyError:
try:
key = field["/T"]
except KeyError:
# Ignore no-name field for now
return
if fileobj:
self._writeField(fileobj, field, fieldAttributes)
fileobj.write("\n")
retval[key] = Field(field)
def _checkKids(self, tree, retval, fileobj):
if "/Kids" in tree:
# recurse down the tree
for kid in tree["/Kids"]:
self.getFields(kid.getObject(), retval, fileobj)
def _writeField(self, fileobj, field, fieldAttributes):
order = ["/TM", "/T", "/FT", "/Parent", "/TU", "/Ff", "/V", "/DV"]
for attr in order:
attrName = fieldAttributes[attr]
try:
if attr == "/FT":
# Make the field type value more clear
types = {"/Btn":"Button", "/Tx":"Text", "/Ch": "Choice",
"/Sig":"Signature"}
if field[attr] in types:
fileobj.write(attrName + ": " + types[field[attr]] + "\n")
elif attr == "/Parent":
# Let's just write the name of the parent
try:
name = field["/Parent"]["/TM"]
except KeyError:
name = field["/Parent"]["/T"]
fileobj.write(attrName + ": " + name + "\n")
else:
fileobj.write(attrName + ": " + str(field[attr]) + "\n")
except KeyError:
# Field attribute is N/A or unknown, so don't write anything
pass
def getFormTextFields(self):
''' Retrieves form fields from the document with textual data (inputs, dropdowns)
'''
# Retrieve document form fields
formfields = self.getFields()
return dict(
(formfields[field]['/T'], formfields[field].get('/V')) for field in formfields \
if formfields[field].get('/FT') == '/Tx'
)
def getNamedDestinations(self, tree=None, retval=None):
"""
Retrieves the named destinations present in the document.
:return: a dictionary which maps names to
:class:`Destinations`.
:rtype: dict
"""
if retval == None:
retval = {}
catalog = self.trailer["/Root"]
# get the name tree
if "/Dests" in catalog:
tree = catalog["/Dests"]
elif "/Names" in catalog:
names = catalog['/Names']
if "/Dests" in names:
tree = names['/Dests']
if tree == None:
return retval
if "/Kids" in tree:
# recurse down the tree
for kid in tree["/Kids"]:
self.getNamedDestinations(kid.getObject(), retval)
if "/Names" in tree:
names = tree["/Names"]
for i in range(0, len(names), 2):
key = names[i].getObject()
val = names[i+1].getObject()
if isinstance(val, DictionaryObject) and '/D' in val:
val = val['/D']
dest = self._buildDestination(key, val)
if dest != None:
retval[key] = dest
return retval
outlines = property(lambda self: self.getOutlines(), None, None)
"""
Read-only property that accesses the
:meth:`getOutlines()` function.
"""
def getOutlines(self, node=None, outlines=None):
"""
Retrieves the document outline present in the document.
:return: a nested list of :class:`Destinations`.
"""
if outlines == None:
outlines = []
catalog = self.trailer["/Root"]
# get the outline dictionary and named destinations
if "/Outlines" in catalog:
try:
lines = catalog["/Outlines"]
except utils.PdfReadError:
# this occurs if the /Outlines object reference is incorrect
# for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf
# so continue to load the file without the Bookmarks
return outlines
if "/First" in lines:
node = lines["/First"]
self._namedDests = self.getNamedDestinations()
if node == None:
return outlines
# see if there are any more outlines
while True:
outline = self._buildOutline(node)
if outline:
outlines.append(outline)
# check for sub-outlines
if "/First" in node:
subOutlines = []
self.getOutlines(node["/First"], subOutlines)
if subOutlines:
outlines.append(subOutlines)
if "/Next" not in node:
break
node = node["/Next"]
return outlines
def _getPageNumberByIndirect(self, indirectRef):
"""Generate _pageId2Num"""
if self._pageId2Num is None:
id2num = {}
for i, x in enumerate(self.pages):
id2num[x.indirectRef.idnum] = i
self._pageId2Num = id2num
if isinstance(indirectRef, int):
idnum = indirectRef
else:
idnum = indirectRef.idnum
ret = self._pageId2Num.get(idnum, -1)
return ret
def getPageNumber(self, page):
"""
Retrieve page number of a given PageObject
:param PageObject page: The page to get page number. Should be
an instance of :class:`PageObject`
:return: the page number or -1 if page not found
:rtype: int
"""
indirectRef = page.indirectRef
ret = self._getPageNumberByIndirect(indirectRef)
return ret
def getDestinationPageNumber(self, destination):
"""
Retrieve page number of a given Destination object
:param Destination destination: The destination to get page number.
Should be an instance of
:class:`Destination`
:return: the page number or -1 if page not found
:rtype: int
"""
indirectRef = destination.page
ret = self._getPageNumberByIndirect(indirectRef)
return ret
def _buildDestination(self, title, array):
page, typ = array[0:2]
array = array[2:]
return Destination(title, page, typ, *array)
def _buildOutline(self, node):
dest, title, outline = None, None, None
if "/A" in node and "/Title" in node:
# Action, section 8.5 (only type GoTo supported)
title = node["/Title"]
action = node["/A"]
if action["/S"] == "/GoTo":
dest = action["/D"]
elif "/Dest" in node and "/Title" in node:
# Destination, section 8.2.1
title = node["/Title"]
dest = node["/Dest"]
# if destination found, then create outline
if dest:
if isinstance(dest, ArrayObject):
outline = self._buildDestination(title, dest)
elif isString(dest) and dest in self._namedDests:
outline = self._namedDests[dest]
outline[NameObject("/Title")] = title
else:
raise utils.PdfReadError("Unexpected destination %r" % dest)
return outline
pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage),
None, None)
"""
Read-only property that emulates a list based upon the
:meth:`getNumPages()` and
:meth:`getPage()` methods.
"""
def getPageLayout(self):
"""
Get the page layout.
See :meth:`setPageLayout()`
for a description of valid layouts.
:return: Page layout currently being used.
:rtype: ``str``, ``None`` if not specified
"""
try:
return self.trailer['/Root']['/PageLayout']
except KeyError:
return None
pageLayout = property(getPageLayout)
"""Read-only property accessing the
:meth:`getPageLayout()` method."""
def getPageMode(self):
"""
Get the page mode.
See :meth:`setPageMode()`
for a description of valid modes.
:return: Page mode currently being used.
:rtype: ``str``, ``None`` if not specified
"""
try:
return self.trailer['/Root']['/PageMode']
except KeyError:
return None
pageMode = property(getPageMode)
"""Read-only property accessing the
:meth:`getPageMode()` method."""
def _flatten(self, pages=None, inherit=None, indirectRef=None):
inheritablePageAttributes = (
NameObject("/Resources"), NameObject("/MediaBox"),
NameObject("/CropBox"), NameObject("/Rotate")
)
if inherit == None:
inherit = dict()
if pages == None:
self.flattenedPages = []
catalog = self.trailer["/Root"].getObject()
pages = catalog["/Pages"].getObject()
t = "/Pages"
if "/Type" in pages:
t = pages["/Type"]
if t == "/Pages":
for attr in inheritablePageAttributes:
if attr in pages:
inherit[attr] = pages[attr]
for page in pages["/Kids"]:
addt = {}
if isinstance(page, IndirectObject):
addt["indirectRef"] = page
self._flatten(page.getObject(), inherit, **addt)
elif t == "/Page":
for attr, value in list(inherit.items()):
# if the page has it's own value, it does not inherit the
# parent's value:
if attr not in pages:
pages[attr] = value
pageObj = PageObject(self, indirectRef)
pageObj.update(pages)
self.flattenedPages.append(pageObj)
def _getObjectFromStream(self, indirectReference):
# indirect reference to object in object stream
# read the entire object stream into memory
debug = False
stmnum, idx = self.xref_objStm[indirectReference.idnum]
if debug: print(("Here1: %s %s"%(stmnum, idx)))
objStm = IndirectObject(stmnum, 0, self).getObject()
if debug: print(("Here2: objStm=%s.. stmnum=%s data=%s"%(objStm, stmnum, objStm.getData())))
# This is an xref to a stream, so its type better be a stream
assert objStm['/Type'] == '/ObjStm'
# /N is the number of indirect objects in the stream
assert idx < objStm['/N']
streamData = BytesIO(b_(objStm.getData()))
for i in range(objStm['/N']):
readNonWhitespace(streamData)
streamData.seek(-1, 1)
objnum = NumberObject.readFromStream(streamData)
readNonWhitespace(streamData)
streamData.seek(-1, 1)
offset = NumberObject.readFromStream(streamData)
readNonWhitespace(streamData)
streamData.seek(-1, 1)
if objnum != indirectReference.idnum:
# We're only interested in one object
continue
if self.strict and idx != i:
raise utils.PdfReadError("Object is in wrong index.")
streamData.seek(objStm['/First']+offset, 0)
if debug:
pos = streamData.tell()
streamData.seek(0, 0)
lines = streamData.readlines()
for i in range(0, len(lines)):
print((lines[i]))
streamData.seek(pos, 0)
try:
obj = readObject(streamData, self)
except utils.PdfStreamError as e:
# Stream object cannot be read. Normally, a critical error, but
# Adobe Reader doesn't complain, so continue (in strict mode?)
e = sys.exc_info()[1]
warnings.warn("Invalid stream (index %d) within object %d %d: %s" % \
(i, indirectReference.idnum, indirectReference.generation, e), utils.PdfReadWarning)
if self.strict:
raise utils.PdfReadError("Can't read object stream: %s"%e)
# Replace with null. Hopefully it's nothing important.
obj = NullObject()
return obj
if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
return NullObject()
def getObject(self, indirectReference):
debug = False
if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
retval = self.cacheGetIndirectObject(indirectReference.generation,
indirectReference.idnum)
if retval != None:
return retval
if indirectReference.generation == 0 and \
indirectReference.idnum in self.xref_objStm:
retval = self._getObjectFromStream(indirectReference)
elif indirectReference.generation in self.xref and \
indirectReference.idnum in self.xref[indirectReference.generation]:
start = self.xref[indirectReference.generation][indirectReference.idnum]
if debug: print((" Uncompressed Object", indirectReference.idnum, indirectReference.generation, ":", start))
self.stream.seek(start, 0)
idnum, generation = self.readObjectHeader(self.stream)
if idnum != indirectReference.idnum and self.xrefIndex:
# Xref table probably had bad indexes due to not being zero-indexed
if self.strict:
raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d); xref table not zero-indexed." \
% (indirectReference.idnum, indirectReference.generation, idnum, generation))
else: pass # xref table is corrected in non-strict mode
elif idnum != indirectReference.idnum and self.strict:
# some other problem
raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d)." \
% (indirectReference.idnum, indirectReference.generation, idnum, generation))
if self.strict:
assert generation == indirectReference.generation
retval = readObject(self.stream, self)
# override encryption is used for the /Encrypt dictionary
if not self._override_encryption and self.isEncrypted:
# if we don't have the encryption key:
if not hasattr(self, '_decryption_key'):
raise utils.PdfReadError("file has not been decrypted")
# otherwise, decrypt here...
import struct
pack1 = struct.pack(">read", stream)
# start at the end:
stream.seek(-1, 2)
if not stream.tell():
raise utils.PdfReadError('Cannot read an empty file')
last1K = stream.tell() - 1024 + 1 # offset of last 1024 bytes of stream
line = b_('')
while line[:5] != b_("%%EOF"):
if stream.tell() < last1K:
raise utils.PdfReadError("EOF marker not found")
line = self.readNextEndLine(stream)
if debug: print(" line:",line)
# find startxref entry - the location of the xref table
line = self.readNextEndLine(stream)
try:
startxref = int(line)
except ValueError:
# 'startxref' may be on the same line as the location
if not line.startswith(b_("startxref")):
raise utils.PdfReadError("startxref not found")
startxref = int(line[9:].strip())
warnings.warn("startxref on same line as offset")
else:
line = self.readNextEndLine(stream)
if line[:9] != b_("startxref"):
raise utils.PdfReadError("startxref not found")
self.startxref = startxref
self.xrefstream = False
# read all cross reference tables and their trailers
self.xref = {}
self.xref_objStm = {}
self.trailer = DictionaryObject()
while True:
# load the xref table
stream.seek(startxref, 0)
x = stream.read(1)
if x == b_("x"):
# standard cross-reference table
ref = stream.read(4)
if ref[:3] != b_("ref"):
raise utils.PdfReadError("xref table read error")
readNonWhitespace(stream)
stream.seek(-1, 1)
firsttime = True; # check if the first time looking at the xref table
while True:
num = readObject(stream, self)
if firsttime and num != 0:
self.xrefIndex = num
if self.strict:
warnings.warn("Xref table not zero-indexed. ID numbers for objects will be corrected.", utils.PdfReadWarning)
#if table not zero indexed, could be due to error from when PDF was created
#which will lead to mismatched indices later on, only warned and corrected if self.strict=True
firsttime = False
readNonWhitespace(stream)
stream.seek(-1, 1)
size = readObject(stream, self)
readNonWhitespace(stream)
stream.seek(-1, 1)
cnt = 0
while cnt < size:
line = stream.read(20)
# It's very clear in section 3.4.3 of the PDF spec
# that all cross-reference table lines are a fixed
# 20 bytes (as of PDF 1.7). However, some files have
# 21-byte entries (or more) due to the use of \r\n
# (CRLF) EOL's. Detect that case, and adjust the line
# until it does not begin with a \r (CR) or \n (LF).
while line[0] in b_("\x0D\x0A"):
stream.seek(-20 + 1, 1)
line = stream.read(20)
# On the other hand, some malformed PDF files
# use a single character EOL without a preceeding
# space. Detect that case, and seek the stream
# back one character. (0-9 means we've bled into
# the next xref entry, t means we've bled into the
# text "trailer"):
if line[-1] in b_("0123456789t"):
stream.seek(-1, 1)
offset, generation = line[:16].split(b_(" "))
offset, generation = int(offset), int(generation)
if generation not in self.xref:
self.xref[generation] = {}
if num in self.xref[generation]:
# It really seems like we should allow the last
# xref table in the file to override previous
# ones. Since we read the file backwards, assume
# any existing key is already set correctly.
pass
else:
self.xref[generation][num] = offset
cnt += 1
num += 1
readNonWhitespace(stream)
stream.seek(-1, 1)
trailertag = stream.read(7)
if trailertag != b_("trailer"):
# more xrefs!
stream.seek(-7, 1)
else:
break
readNonWhitespace(stream)
stream.seek(-1, 1)
newTrailer = readObject(stream, self)
for key, value in list(newTrailer.items()):
if key not in self.trailer:
self.trailer[key] = value
if "/Prev" in newTrailer:
startxref = newTrailer["/Prev"]
else:
break
elif x.isdigit():
# PDF 1.5+ Cross-Reference Stream
stream.seek(-1, 1)
idnum, generation = self.readObjectHeader(stream)
xrefstream = readObject(stream, self)
assert xrefstream["/Type"] == "/XRef"
self.xrefstream = True
self.cacheIndirectObject(generation, idnum, xrefstream)
streamData = BytesIO(b_(xrefstream.getData()))
# Index pairs specify the subsections in the dictionary. If
# none create one subsection that spans everything.
idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
if debug: print(("read idx_pairs=%s"%list(self._pairs(idx_pairs))))
entrySizes = xrefstream.get("/W")
assert len(entrySizes) >= 3
if self.strict and len(entrySizes) > 3:
raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
def getEntry(i):
# Reads the correct number of bytes for each entry. See the
# discussion of the W parameter in PDF spec table 17.
if entrySizes[i] > 0:
d = streamData.read(entrySizes[i])
return convertToInt(d, entrySizes[i])
# PDF Spec Table 17: A value of zero for an element in the
# W array indicates...the default value shall be used
if i == 0: return 1 # First value defaults to 1
else: return 0
def used_before(num, generation):
# We move backwards through the xrefs, don't replace any.
return num in self.xref.get(generation, []) or \
num in self.xref_objStm
# Iterate through each subsection
last_end = 0
for start, size in self._pairs(idx_pairs):
# The subsections must increase
assert start >= last_end
last_end = start + size
for num in range(start, start+size):
# The first entry is the type
xref_type = getEntry(0)
# The rest of the elements depend on the xref_type
if xref_type == 0:
# linked list of free objects
next_free_object = getEntry(1)
next_generation = getEntry(2)
elif xref_type == 1:
# objects that are in use but are not compressed
byte_offset = getEntry(1)
generation = getEntry(2)
if generation not in self.xref:
self.xref[generation] = {}
if not used_before(num, generation):
self.xref[generation][num] = byte_offset
if debug: print(("XREF Uncompressed: %s %s"%(
num, generation)))
elif xref_type == 2:
# compressed objects
objstr_num = getEntry(1)
obstr_idx = getEntry(2)
generation = 0 # PDF spec table 18, generation is 0
if not used_before(num, generation):
if debug: print(("XREF Compressed: %s %s %s"%(
num, objstr_num, obstr_idx)))
self.xref_objStm[num] = (objstr_num, obstr_idx)
elif self.strict:
raise utils.PdfReadError("Unknown xref type: %s"%
xref_type)
trailerKeys = "/Root", "/Encrypt", "/Info", "/ID", "/Size"
for key in trailerKeys:
if key in xrefstream and key not in self.trailer:
self.trailer[NameObject(key)] = xrefstream.raw_get(key)
if "/Prev" in xrefstream:
startxref = xrefstream["/Prev"]
else:
break
else:
# bad xref character at startxref. Let's see if we can find
# the xref table nearby, as we've observed this error with an
# off-by-one before.
stream.seek(-11, 1)
tmp = stream.read(20)
xref_loc = tmp.find(b_("xref"))
if xref_loc != -1:
startxref -= (10 - xref_loc)
continue
# No explicit xref table, try finding a cross-reference stream.
stream.seek(startxref, 0)
found = False
for look in range(5):
if stream.read(1).isdigit():
# This is not a standard PDF, consider adding a warning
startxref += look
found = True
break
if found:
continue
# no xref table found at specified location
raise utils.PdfReadError("Could not find xref table at specified location")
#if not zero-indexed, verify that the table is correct; change it if necessary
if self.xrefIndex and not self.strict:
loc = stream.tell()
for gen in self.xref:
if gen == 65535: continue
for id in self.xref[gen]:
stream.seek(self.xref[gen][id], 0)
try:
pid, pgen = self.readObjectHeader(stream)
except ValueError:
break
if pid == id - self.xrefIndex:
self._zeroXref(gen)
break
#if not, then either it's just plain wrong, or the non-zero-index is actually correct
stream.seek(loc, 0) #return to where it was
def _zeroXref(self, generation):
self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
def _pairs(self, array):
i = 0
while True:
yield array[i], array[i+1]
i += 2
if (i+1) >= len(array):
break
def readNextEndLine(self, stream):
debug = False
if debug: print(">>readNextEndLine")
line_parts = []
while True:
# Prevent infinite loops in malformed PDFs
if stream.tell() == 0:
raise utils.PdfReadError("Could not read malformed PDF file")
x = stream.read(1)
if debug: print((" x:", x, "%x"%ord(x)))
if stream.tell() < 2:
raise utils.PdfReadError("EOL marker not found")
stream.seek(-2, 1)
if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
crlf = False
while x == b_('\n') or x == b_('\r'):
if debug:
if ord(x) == 0x0D: print(" x is CR 0D")
elif ord(x) == 0x0A: print(" x is LF 0A")
x = stream.read(1)
if x == b_('\n') or x == b_('\r'): # account for CR+LF
stream.seek(-1, 1)
crlf = True
if stream.tell() < 2:
raise utils.PdfReadError("EOL marker not found")
stream.seek(-2, 1)
stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
break
else:
if debug: print(" x is neither")
line_parts.append(x)
if debug: print("leaving RNEL")
line_parts.reverse()
return b"".join(line_parts)
def decrypt(self, password):
"""
When using an encrypted / secured PDF file with the PDF Standard
encryption handler, this function will allow the file to be decrypted.
It checks the given password against the document's user password and
owner password, and then stores the resulting decryption key if either
password is correct.
It does not matter which password was matched. Both passwords provide
the correct decryption key that will allow the document to be used with
this library.
:param str password: The password to match.
:return: ``0`` if the password failed, ``1`` if the password matched the user
password, and ``2`` if the password matched the owner password.
:rtype: int
:raises NotImplementedError: if document uses an unsupported encryption
method.
"""
self._override_encryption = True
try:
return self._decrypt(password)
finally:
self._override_encryption = False
def _decrypt(self, password):
encrypt = self.trailer['/Encrypt'].getObject()
if encrypt['/Filter'] != '/Standard':
raise NotImplementedError("only Standard PDF encryption handler is available")
if not (encrypt['/V'] in (1, 2)):
raise NotImplementedError("only algorithm code 1 and 2 are supported. This PDF uses code %s" % encrypt['/V'])
user_password, key = self._authenticateUserPassword(password)
if user_password:
self._decryption_key = key
return 1
else:
rev = encrypt['/R'].getObject()
if rev == 2:
keylen = 5
else:
keylen = encrypt['/Length'].getObject() // 8
key = _alg33_1(password, rev, keylen)
real_O = encrypt["/O"].getObject()
if rev == 2:
userpass = utils.RC4_encrypt(key, real_O)
else:
val = real_O
for i in range(19, -1, -1):
new_key = b_('')
for l in range(len(key)):
new_key += b_(chr(utils.ord_(key[l]) ^ i))
val = utils.RC4_encrypt(new_key, val)
userpass = val
owner_password, key = self._authenticateUserPassword(userpass)
if owner_password:
self._decryption_key = key
return 2
return 0
def _authenticateUserPassword(self, password):
encrypt = self.trailer['/Encrypt'].getObject()
rev = encrypt['/R'].getObject()
owner_entry = encrypt['/O'].getObject()
p_entry = encrypt['/P'].getObject()
id_entry = self.trailer['/ID'].getObject()
id1_entry = id_entry[0].getObject()
real_U = encrypt['/U'].getObject().original_bytes
if rev == 2:
U, key = _alg34(password, owner_entry, p_entry, id1_entry)
elif rev >= 3:
U, key = _alg35(password, rev,
encrypt["/Length"].getObject() // 8, owner_entry,
p_entry, id1_entry,
encrypt.get("/EncryptMetadata", BooleanObject(False)).getObject())
U, real_U = U[:16], real_U[:16]
return U == real_U, key
def getIsEncrypted(self):
return "/Encrypt" in self.trailer
isEncrypted = property(lambda self: self.getIsEncrypted(), None, None)
"""
Read-only boolean property showing whether this PDF file is encrypted.
Note that this property, if true, will remain true even after the
:meth:`decrypt()` method is called.
"""
def getRectangle(self, name, defaults):
retval = self.get(name)
if isinstance(retval, RectangleObject):
return retval
if retval == None:
for d in defaults:
retval = self.get(d)
if retval != None:
break
if isinstance(retval, IndirectObject):
retval = self.pdf.getObject(retval)
retval = RectangleObject(retval)
setRectangle(self, name, retval)
return retval
def setRectangle(self, name, value):
if not isinstance(name, NameObject):
name = NameObject(name)
self[name] = value
def deleteRectangle(self, name):
del self[name]
def createRectangleAccessor(name, fallback):
return \
property(
lambda self: getRectangle(self, name, fallback),
lambda self, value: setRectangle(self, name, value),
lambda self: deleteRectangle(self, name)
)
class PageObject(DictionaryObject):
"""
This class represents a single page within a PDF file. Typically this
object will be created by accessing the
:meth:`getPage()` method of the
:class:`PdfFileReader` class, but it is
also possible to create an empty page with the
:meth:`createBlankPage()` static method.
:param pdf: PDF file the page belongs to.
:param indirectRef: Stores the original indirect reference to
this object in its source PDF
"""
def __init__(self, pdf=None, indirectRef=None):
DictionaryObject.__init__(self)
self.pdf = pdf
self.indirectRef = indirectRef
def createBlankPage(pdf=None, width=None, height=None):
"""
Returns a new blank page.
If ``width`` or ``height`` is ``None``, try to get the page size
from the last page of *pdf*.
:param pdf: PDF file the page belongs to
:param float width: The width of the new page expressed in default user
space units.
:param float height: The height of the new page expressed in default user
space units.
:return: the new blank page:
:rtype: :class:`PageObject`
:raises PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains
no page
"""
page = PageObject(pdf)
# Creates a new page (cf PDF Reference 7.7.3.3)
page.__setitem__(NameObject('/Type'), NameObject('/Page'))
page.__setitem__(NameObject('/Parent'), NullObject())
page.__setitem__(NameObject('/Resources'), DictionaryObject())
if width is None or height is None:
if pdf is not None and pdf.getNumPages() > 0:
lastpage = pdf.getPage(pdf.getNumPages() - 1)
width = lastpage.mediaBox.getWidth()
height = lastpage.mediaBox.getHeight()
else:
raise utils.PageSizeNotDefinedError()
page.__setitem__(NameObject('/MediaBox'),
RectangleObject([0, 0, width, height]))
return page
createBlankPage = staticmethod(createBlankPage)
def rotateClockwise(self, angle):
"""
Rotates a page clockwise by increments of 90 degrees.
:param int angle: Angle to rotate the page. Must be an increment
of 90 deg.
"""
assert angle % 90 == 0
self._rotate(angle)
return self
def rotateCounterClockwise(self, angle):
"""
Rotates a page counter-clockwise by increments of 90 degrees.
:param int angle: Angle to rotate the page. Must be an increment
of 90 deg.
"""
assert angle % 90 == 0
self._rotate(-angle)
return self
def _rotate(self, angle):
rotateObj = self.get("/Rotate", 0)
currentAngle = rotateObj if isinstance(rotateObj, int) else rotateObj.getObject()
self[NameObject("/Rotate")] = NumberObject(currentAngle + angle)
def _mergeResources(res1, res2, resource):
newRes = DictionaryObject()
newRes.update(res1.get(resource, DictionaryObject()).getObject())
page2Res = res2.get(resource, DictionaryObject()).getObject()
renameRes = {}
for key in list(page2Res.keys()):
if key in newRes and newRes.raw_get(key) != page2Res.raw_get(key):
newname = NameObject(key + str(uuid.uuid4()))
renameRes[key] = newname
newRes[newname] = page2Res[key]
elif key not in newRes:
newRes[key] = page2Res.raw_get(key)
return newRes, renameRes
_mergeResources = staticmethod(_mergeResources)
def _contentStreamRename(stream, rename, pdf):
if not rename:
return stream
stream = ContentStream(stream, pdf)
for operands, operator in stream.operations:
for i in range(len(operands)):
op = operands[i]
if isinstance(op, NameObject):
operands[i] = rename.get(op,op)
return stream
_contentStreamRename = staticmethod(_contentStreamRename)
def _pushPopGS(contents, pdf):
# adds a graphics state "push" and "pop" to the beginning and end
# of a content stream. This isolates it from changes such as
# transformation matricies.
stream = ContentStream(contents, pdf)
stream.operations.insert(0, [[], "q"])
stream.operations.append([[], "Q"])
return stream
_pushPopGS = staticmethod(_pushPopGS)
def _addTransformationMatrix(contents, pdf, ctm):
# adds transformation matrix at the beginning of the given
# contents stream.
a, b, c, d, e, f = ctm
contents = ContentStream(contents, pdf)
contents.operations.insert(0, [[FloatObject(a), FloatObject(b),
FloatObject(c), FloatObject(d), FloatObject(e),
FloatObject(f)], " cm"])
return contents
_addTransformationMatrix = staticmethod(_addTransformationMatrix)
def getContents(self):
"""
Accesses the page contents.
:return: the ``/Contents`` object, or ``None`` if it doesn't exist.
``/Contents`` is optional, as described in PDF Reference 7.7.3.3
"""
if "/Contents" in self:
return self["/Contents"].getObject()
else:
return None
def mergePage(self, page2):
"""
Merges the content streams of two pages into one. Resource references
(i.e. fonts) are maintained from both pages. The mediabox/cropbox/etc
of this page are not altered. The parameter page's content stream will
be added to the end of this page's content stream, meaning that it will
be drawn after, or "on top" of this page.
:param PageObject page2: The page to be merged into this one. Should be
an instance of :class:`PageObject`.
"""
self._mergePage(page2)
def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False):
# First we work on merging the resource dictionaries. This allows us
# to find out what symbols in the content streams we might need to
# rename.
newResources = DictionaryObject()
rename = {}
originalResources = self["/Resources"].getObject()
page2Resources = page2["/Resources"].getObject()
newAnnots = ArrayObject()
for page in (self, page2):
if "/Annots" in page:
annots = page["/Annots"]
if isinstance(annots, ArrayObject):
for ref in annots:
newAnnots.append(ref)
for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading", "/Properties":
new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
if new:
newResources[NameObject(res)] = new
rename.update(newrename)
# Combine /ProcSet sets.
newResources[NameObject("/ProcSet")] = ArrayObject(
frozenset(originalResources.get("/ProcSet", ArrayObject()).getObject()).union(
frozenset(page2Resources.get("/ProcSet", ArrayObject()).getObject())
)
)
newContentArray = ArrayObject()
originalContent = self.getContents()
if originalContent is not None:
newContentArray.append(PageObject._pushPopGS(
originalContent, self.pdf))
page2Content = page2.getContents()
if page2Content is not None:
if page2transformation is not None:
page2Content = page2transformation(page2Content)
page2Content = PageObject._contentStreamRename(
page2Content, rename, self.pdf)
page2Content = PageObject._pushPopGS(page2Content, self.pdf)
newContentArray.append(page2Content)
# if expanding the page to fit a new page, calculate the new media box size
if expand:
corners1 = [self.mediaBox.getLowerLeft_x().as_numeric(), self.mediaBox.getLowerLeft_y().as_numeric(),
self.mediaBox.getUpperRight_x().as_numeric(), self.mediaBox.getUpperRight_y().as_numeric()]
corners2 = [page2.mediaBox.getLowerLeft_x().as_numeric(), page2.mediaBox.getLowerLeft_y().as_numeric(),
page2.mediaBox.getUpperLeft_x().as_numeric(), page2.mediaBox.getUpperLeft_y().as_numeric(),
page2.mediaBox.getUpperRight_x().as_numeric(), page2.mediaBox.getUpperRight_y().as_numeric(),
page2.mediaBox.getLowerRight_x().as_numeric(), page2.mediaBox.getLowerRight_y().as_numeric()]
if ctm is not None:
ctm = [float(x) for x in ctm]
new_x = [ctm[0]*corners2[i] + ctm[2]*corners2[i+1] + ctm[4] for i in range(0, 8, 2)]
new_y = [ctm[1]*corners2[i] + ctm[3]*corners2[i+1] + ctm[5] for i in range(0, 8, 2)]
else:
new_x = corners2[0:8:2]
new_y = corners2[1:8:2]
lowerleft = [min(new_x), min(new_y)]
upperright = [max(new_x), max(new_y)]
lowerleft = [min(corners1[0], lowerleft[0]), min(corners1[1], lowerleft[1])]
upperright = [max(corners1[2], upperright[0]), max(corners1[3], upperright[1])]
self.mediaBox.setLowerLeft(lowerleft)
self.mediaBox.setUpperRight(upperright)
self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf)
self[NameObject('/Resources')] = newResources
self[NameObject('/Annots')] = newAnnots
def mergeTransformedPage(self, page2, ctm, expand=False):
"""
This is similar to mergePage, but a transformation matrix is
applied to the merged stream.
:param PageObject page2: The page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param tuple ctm: a 6-element tuple containing the operands of the
transformation matrix
:param bool expand: Whether the page should be expanded to fit the dimensions
of the page to be merged.
"""
self._mergePage(page2, lambda page2Content:
PageObject._addTransformationMatrix(page2Content, page2.pdf, ctm), ctm, expand)
def mergeScaledPage(self, page2, scale, expand=False):
"""
This is similar to mergePage, but the stream to be merged is scaled
by appling a transformation matrix.
:param PageObject page2: The page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param float scale: The scaling factor
:param bool expand: Whether the page should be expanded to fit the
dimensions of the page to be merged.
"""
# CTM to scale : [ sx 0 0 sy 0 0 ]
return self.mergeTransformedPage(page2, [scale, 0,
0, scale,
0, 0], expand)
def mergeRotatedPage(self, page2, rotation, expand=False):
"""
This is similar to mergePage, but the stream to be merged is rotated
by appling a transformation matrix.
:param PageObject page2: the page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param float rotation: The angle of the rotation, in degrees
:param bool expand: Whether the page should be expanded to fit the
dimensions of the page to be merged.
"""
rotation = math.radians(rotation)
return self.mergeTransformedPage(page2,
[math.cos(rotation), math.sin(rotation),
-math.sin(rotation), math.cos(rotation),
0, 0], expand)
def mergeTranslatedPage(self, page2, tx, ty, expand=False):
"""
This is similar to mergePage, but the stream to be merged is translated
by appling a transformation matrix.
:param PageObject page2: the page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param float tx: The translation on X axis
:param float ty: The translation on Y axis
:param bool expand: Whether the page should be expanded to fit the
dimensions of the page to be merged.
"""
return self.mergeTransformedPage(page2, [1, 0,
0, 1,
tx, ty], expand)
def mergeRotatedTranslatedPage(self, page2, rotation, tx, ty, expand=False):
"""
This is similar to mergePage, but the stream to be merged is rotated
and translated by appling a transformation matrix.
:param PageObject page2: the page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param float tx: The translation on X axis
:param float ty: The translation on Y axis
:param float rotation: The angle of the rotation, in degrees
:param bool expand: Whether the page should be expanded to fit the
dimensions of the page to be merged.
"""
translation = [[1, 0, 0],
[0, 1, 0],
[-tx, -ty, 1]]
rotation = math.radians(rotation)
rotating = [[math.cos(rotation), math.sin(rotation), 0],
[-math.sin(rotation), math.cos(rotation), 0],
[0, 0, 1]]
rtranslation = [[1, 0, 0],
[0, 1, 0],
[tx, ty, 1]]
ctm = utils.matrixMultiply(translation, rotating)
ctm = utils.matrixMultiply(ctm, rtranslation)
return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
ctm[1][0], ctm[1][1],
ctm[2][0], ctm[2][1]], expand)
def mergeRotatedScaledPage(self, page2, rotation, scale, expand=False):
"""
This is similar to mergePage, but the stream to be merged is rotated
and scaled by appling a transformation matrix.
:param PageObject page2: the page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param float rotation: The angle of the rotation, in degrees
:param float scale: The scaling factor
:param bool expand: Whether the page should be expanded to fit the
dimensions of the page to be merged.
"""
rotation = math.radians(rotation)
rotating = [[math.cos(rotation), math.sin(rotation), 0],
[-math.sin(rotation), math.cos(rotation), 0],
[0, 0, 1]]
scaling = [[scale, 0, 0],
[0, scale, 0],
[0, 0, 1]]
ctm = utils.matrixMultiply(rotating, scaling)
return self.mergeTransformedPage(page2,
[ctm[0][0], ctm[0][1],
ctm[1][0], ctm[1][1],
ctm[2][0], ctm[2][1]], expand)
def mergeScaledTranslatedPage(self, page2, scale, tx, ty, expand=False):
"""
This is similar to mergePage, but the stream to be merged is translated
and scaled by appling a transformation matrix.
:param PageObject page2: the page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param float scale: The scaling factor
:param float tx: The translation on X axis
:param float ty: The translation on Y axis
:param bool expand: Whether the page should be expanded to fit the
dimensions of the page to be merged.
"""
translation = [[1, 0, 0],
[0, 1, 0],
[tx, ty, 1]]
scaling = [[scale, 0, 0],
[0, scale, 0],
[0, 0, 1]]
ctm = utils.matrixMultiply(scaling, translation)
return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
ctm[1][0], ctm[1][1],
ctm[2][0], ctm[2][1]], expand)
def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expand=False):
"""
This is similar to mergePage, but the stream to be merged is translated,
rotated and scaled by appling a transformation matrix.
:param PageObject page2: the page to be merged into this one. Should be
an instance of :class:`PageObject`.
:param float tx: The translation on X axis
:param float ty: The translation on Y axis
:param float rotation: The angle of the rotation, in degrees
:param float scale: The scaling factor
:param bool expand: Whether the page should be expanded to fit the
dimensions of the page to be merged.
"""
translation = [[1, 0, 0],
[0, 1, 0],
[tx, ty, 1]]
rotation = math.radians(rotation)
rotating = [[math.cos(rotation), math.sin(rotation), 0],
[-math.sin(rotation), math.cos(rotation), 0],
[0, 0, 1]]
scaling = [[scale, 0, 0],
[0, scale, 0],
[0, 0, 1]]
ctm = utils.matrixMultiply(rotating, scaling)
ctm = utils.matrixMultiply(ctm, translation)
return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
ctm[1][0], ctm[1][1],
ctm[2][0], ctm[2][1]], expand)
##
# Applys a transformation matrix the page.
#
# @param ctm A 6 elements tuple containing the operands of the
# transformation matrix
def addTransformation(self, ctm):
"""
Applies a transformation matrix to the page.
:param tuple ctm: A 6-element tuple containing the operands of the
transformation matrix.
"""
originalContent = self.getContents()
if originalContent is not None:
newContent = PageObject._addTransformationMatrix(
originalContent, self.pdf, ctm)
newContent = PageObject._pushPopGS(newContent, self.pdf)
self[NameObject('/Contents')] = newContent
def scale(self, sx, sy):
"""
Scales a page by the given factors by appling a transformation
matrix to its content and updating the page size.
:param float sx: The scaling factor on horizontal axis.
:param float sy: The scaling factor on vertical axis.
"""
self.addTransformation([sx, 0,
0, sy,
0, 0])
self.mediaBox = RectangleObject([
float(self.mediaBox.getLowerLeft_x()) * sx,
float(self.mediaBox.getLowerLeft_y()) * sy,
float(self.mediaBox.getUpperRight_x()) * sx,
float(self.mediaBox.getUpperRight_y()) * sy])
if "/VP" in self:
viewport = self["/VP"]
if isinstance(viewport, ArrayObject):
bbox = viewport[0]["/BBox"]
else:
bbox = viewport["/BBox"]
scaled_bbox = RectangleObject([
float(bbox[0]) * sx,
float(bbox[1]) * sy,
float(bbox[2]) * sx,
float(bbox[3]) * sy])
if isinstance(viewport, ArrayObject):
self[NameObject("/VP")][NumberObject(0)][NameObject("/BBox")] = scaled_bbox
else:
self[NameObject("/VP")][NameObject("/BBox")] = scaled_bbox
def scaleBy(self, factor):
"""
Scales a page by the given factor by appling a transformation
matrix to its content and updating the page size.
:param float factor: The scaling factor (for both X and Y axis).
"""
self.scale(factor, factor)
def scaleTo(self, width, height):
"""
Scales a page to the specified dimentions by appling a
transformation matrix to its content and updating the page size.
:param float width: The new width.
:param float height: The new heigth.
"""
sx = width / float(self.mediaBox.getUpperRight_x() -
self.mediaBox.getLowerLeft_x ())
sy = height / float(self.mediaBox.getUpperRight_y() -
self.mediaBox.getLowerLeft_y ())
self.scale(sx, sy)
def compressContentStreams(self):
"""
Compresses the size of this page by joining all content streams and
applying a FlateDecode filter.
However, it is possible that this function will perform no action if
content stream compression becomes "automatic" for some reason.
"""
content = self.getContents()
if content is not None:
if not isinstance(content, ContentStream):
content = ContentStream(content, self.pdf)
self[NameObject("/Contents")] = content.flateEncode()
def extractText(self):
"""
Locate all text drawing commands, in the order they are provided in the
content stream, and extract the text. This works well for some PDF
files, but poorly for others, depending on the generator used. This will
be refined in the future. Do not rely on the order of text coming out of
this function, as it will change if this function is made more
sophisticated.
:return: a unicode string object.
"""
text = u_("")
content = self["/Contents"].getObject()
if not isinstance(content, ContentStream):
content = ContentStream(content, self.pdf)
# Note: we check all strings are TextStringObjects. ByteStringObjects
# are strings where the byte->string encoding was unknown, so adding
# them to the text here would be gibberish.
for operands, operator in content.operations:
if operator == b_("Tj"):
_text = operands[0]
if isinstance(_text, TextStringObject):
text += _text
text += "\n"
elif operator == b_("T*"):
text += "\n"
elif operator == b_("'"):
text += "\n"
_text = operands[0]
if isinstance(_text, TextStringObject):
text += operands[0]
elif operator == b_('"'):
_text = operands[2]
if isinstance(_text, TextStringObject):
text += "\n"
text += _text
elif operator == b_("TJ"):
for i in operands[0]:
if isinstance(i, TextStringObject):
text += i
text += "\n"
return text
mediaBox = createRectangleAccessor("/MediaBox", ())
"""
A :class:`RectangleObject`, expressed in default user space units,
defining the boundaries of the physical medium on which the page is
intended to be displayed or printed.
"""
cropBox = createRectangleAccessor("/CropBox", ("/MediaBox",))
"""
A :class:`RectangleObject`, expressed in default user space units,
defining the visible region of default user space. When the page is
displayed or printed, its contents are to be clipped (cropped) to this
rectangle and then imposed on the output medium in some
implementation-defined manner. Default value: same as :attr:`mediaBox`.
"""
bleedBox = createRectangleAccessor("/BleedBox", ("/CropBox", "/MediaBox"))
"""
A :class:`RectangleObject`, expressed in default user space units,
defining the region to which the contents of the page should be clipped
when output in a production enviroment.
"""
trimBox = createRectangleAccessor("/TrimBox", ("/CropBox", "/MediaBox"))
"""
A :class:`RectangleObject`, expressed in default user space units,
defining the intended dimensions of the finished page after trimming.
"""
artBox = createRectangleAccessor("/ArtBox", ("/CropBox", "/MediaBox"))
"""
A :class:`RectangleObject`, expressed in default user space units,
defining the extent of the page's meaningful content as intended by the
page's creator.
"""
class ContentStream(DecodedStreamObject):
def __init__(self, stream, pdf):
self.pdf = pdf
self.operations = []
# stream may be a StreamObject or an ArrayObject containing
# multiple StreamObjects to be cat'd together.
stream = stream.getObject()
if isinstance(stream, ArrayObject):
data = b_("")
for s in stream:
data += b_(s.getObject().getData())
stream = BytesIO(b_(data))
else:
stream = BytesIO(b_(stream.getData()))
self.__parseContentStream(stream)
def __parseContentStream(self, stream):
# file("f:\\tmp.txt", "w").write(stream.read())
stream.seek(0, 0)
operands = []
while True:
peek = readNonWhitespace(stream)
if peek == b_('') or ord_(peek) == 0:
break
stream.seek(-1, 1)
if peek.isalpha() or peek == b_("'") or peek == b_('"'):
operator = utils.readUntilRegex(stream,
NameObject.delimiterPattern, True)
if operator == b_("BI"):
# begin inline image - a completely different parsing
# mechanism is required, of course... thanks buddy...
assert operands == []
ii = self._readInlineImage(stream)
self.operations.append((ii, b_("INLINE IMAGE")))
else:
self.operations.append((operands, operator))
operands = []
elif peek == b_('%'):
# If we encounter a comment in the content stream, we have to
# handle it here. Typically, readObject will handle
# encountering a comment -- but readObject assumes that
# following the comment must be the object we're trying to
# read. In this case, it could be an operator instead.
while peek not in (b_('\r'), b_('\n')):
peek = stream.read(1)
else:
operands.append(readObject(stream, None))
def _readInlineImage(self, stream):
# begin reading just after the "BI" - begin image
# first read the dictionary of settings.
settings = DictionaryObject()
while True:
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
if tok == b_("I"):
# "ID" - begin of image data
break
key = readObject(stream, self.pdf)
tok = readNonWhitespace(stream)
stream.seek(-1, 1)
value = readObject(stream, self.pdf)
settings[key] = value
# left at beginning of ID
tmp = stream.read(3)
assert tmp[:2] == b_("ID")
data = b_("")
while True:
# Read the inline image, while checking for EI (End Image) operator.
tok = stream.read(1)
if tok == b_("E"):
# Check for End Image
tok2 = stream.read(1)
if tok2 == b_("I"):
# Data can contain EI, so check for the Q operator.
tok3 = stream.read(1)
info = tok + tok2
# We need to find whitespace between EI and Q.
has_q_whitespace = False
while tok3 in utils.WHITESPACES:
has_q_whitespace = True
info += tok3
tok3 = stream.read(1)
if tok3 == b_("Q") and has_q_whitespace:
stream.seek(-1, 1)
break
else:
stream.seek(-1,1)
data += info
else:
stream.seek(-1, 1)
data += tok
else:
data += tok
return {"settings": settings, "data": data}
def _getData(self):
newdata = BytesIO()
for operands, operator in self.operations:
if operator == b_("INLINE IMAGE"):
newdata.write(b_("BI"))
dicttext = BytesIO()
operands["settings"].writeToStream(dicttext, None)
newdata.write(dicttext.getvalue()[2:-2])
newdata.write(b_("ID "))
newdata.write(operands["data"])
newdata.write(b_("EI"))
else:
for op in operands:
op.writeToStream(newdata, None)
newdata.write(b_(" "))
newdata.write(b_(operator))
newdata.write(b_("\n"))
return newdata.getvalue()
def _setData(self, value):
self.__parseContentStream(BytesIO(b_(value)))
_data = property(_getData, _setData)
class DocumentInformation(DictionaryObject):
"""
A class representing the basic document metadata provided in a PDF File.
This class is accessible through
:meth:`getDocumentInfo()`
All text properties of the document metadata have
*two* properties, eg. author and author_raw. The non-raw property will
always return a ``TextStringObject``, making it ideal for a case where
the metadata is being displayed. The raw property can sometimes return
a ``ByteStringObject``, if PyPDF2 was unable to decode the string's
text encoding; this requires additional safety in the caller and
therefore is not as commonly accessed.
"""
def __init__(self):
DictionaryObject.__init__(self)
def getText(self, key):
retval = self.get(key, None)
if isinstance(retval, TextStringObject):
return retval
return None
title = property(lambda self: self.getText("/Title"))
"""Read-only property accessing the document's **title**.
Returns a unicode string (``TextStringObject``) or ``None``
if the title is not specified."""
title_raw = property(lambda self: self.get("/Title"))
"""The "raw" version of title; can return a ``ByteStringObject``."""
author = property(lambda self: self.getText("/Author"))
"""Read-only property accessing the document's **author**.
Returns a unicode string (``TextStringObject``) or ``None``
if the author is not specified."""
author_raw = property(lambda self: self.get("/Author"))
"""The "raw" version of author; can return a ``ByteStringObject``."""
subject = property(lambda self: self.getText("/Subject"))
"""Read-only property accessing the document's **subject**.
Returns a unicode string (``TextStringObject``) or ``None``
if the subject is not specified."""
subject_raw = property(lambda self: self.get("/Subject"))
"""The "raw" version of subject; can return a ``ByteStringObject``."""
creator = property(lambda self: self.getText("/Creator"))
"""Read-only property accessing the document's **creator**. If the
document was converted to PDF from another format, this is the name of the
application (e.g. OpenOffice) that created the original document from
which it was converted. Returns a unicode string (``TextStringObject``)
or ``None`` if the creator is not specified."""
creator_raw = property(lambda self: self.get("/Creator"))
"""The "raw" version of creator; can return a ``ByteStringObject``."""
producer = property(lambda self: self.getText("/Producer"))
"""Read-only property accessing the document's **producer**.
If the document was converted to PDF from another format, this is
the name of the application (for example, OSX Quartz) that converted
it to PDF. Returns a unicode string (``TextStringObject``)
or ``None`` if the producer is not specified."""
producer_raw = property(lambda self: self.get("/Producer"))
"""The "raw" version of producer; can return a ``ByteStringObject``."""
def convertToInt(d, size):
if size > 8:
raise utils.PdfReadError("invalid size in convertToInt")
d = b_("\x00\x00\x00\x00\x00\x00\x00\x00") + b_(d)
d = d[-8:]
return struct.unpack(">q", d)[0]
# ref: pdf1.8 spec section 3.5.2 algorithm 3.2
_encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
b_('\xa9\xfe\x64\x53\x69\x7a')
# Implementation of algorithm 3.2 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
# 1. Pad or truncate the password string to exactly 32 bytes. If the
# password string is more than 32 bytes long, use only its first 32 bytes;
# if it is less than 32 bytes long, pad it by appending the required number
# of additional bytes from the beginning of the padding string
# (_encryption_padding).
password = b_((str_(password) + str_(_encryption_padding))[:32])
# 2. Initialize the MD5 hash function and pass the result of step 1 as
# input to this function.
import struct
m = md5(password)
# 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
# function.
m.update(owner_entry.original_bytes)
# 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass
# these bytes to the MD5 hash function, low-order byte first.
if p_entry < 0:
p_entry = struct.pack('= 3 and not metadata_encrypt:
m.update(b_("\xff\xff\xff\xff"))
# 7. Finish the hash.
md5_hash = m.digest()
# 8. (Revision 3 or greater) Do the following 50 times: Take the output
# from the previous MD5 hash and pass the first n bytes of the output as
# input into a new MD5 hash, where n is the number of bytes of the
# encryption key as defined by the value of the encryption dictionary's
# /Length entry.
if rev >= 3:
for i in range(50):
md5_hash = md5(md5_hash[:keylen]).digest()
# 9. Set the encryption key to the first n bytes of the output from the
# final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
# greater, depends on the value of the encryption dictionary's /Length
# entry.
return md5_hash[:keylen]
# Implementation of algorithm 3.3 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg33(owner_pwd, user_pwd, rev, keylen):
# steps 1 - 4
key = _alg33_1(owner_pwd, rev, keylen)
# 5. Pad or truncate the user password string as described in step 1 of
# algorithm 3.2.
user_pwd = b_((user_pwd + str_(_encryption_padding))[:32])
# 6. Encrypt the result of step 5, using an RC4 encryption function with
# the encryption key obtained in step 4.
val = utils.RC4_encrypt(key, user_pwd)
# 7. (Revision 3 or greater) Do the following 19 times: Take the output
# from the previous invocation of the RC4 function and pass it as input to
# a new invocation of the function; use an encryption key generated by
# taking each byte of the encryption key obtained in step 4 and performing
# an XOR operation between that byte and the single-byte value of the
# iteration counter (from 1 to 19).
if rev >= 3:
for i in range(1, 20):
new_key = ''
for l in range(len(key)):
new_key += chr(ord_(key[l]) ^ i)
val = utils.RC4_encrypt(new_key, val)
# 8. Store the output from the final invocation of the RC4 as the value of
# the /O entry in the encryption dictionary.
return val
# Steps 1-4 of algorithm 3.3
def _alg33_1(password, rev, keylen):
# 1. Pad or truncate the owner password string as described in step 1 of
# algorithm 3.2. If there is no owner password, use the user password
# instead.
password = b_((password + str_(_encryption_padding))[:32])
# 2. Initialize the MD5 hash function and pass the result of step 1 as
# input to this function.
m = md5(password)
# 3. (Revision 3 or greater) Do the following 50 times: Take the output
# from the previous MD5 hash and pass it as input into a new MD5 hash.
md5_hash = m.digest()
if rev >= 3:
for i in range(50):
md5_hash = md5(md5_hash).digest()
# 4. Create an RC4 encryption key using the first n bytes of the output
# from the final MD5 hash, where n is always 5 for revision 2 but, for
# revision 3 or greater, depends on the value of the encryption
# dictionary's /Length entry.
key = md5_hash[:keylen]
return key
# Implementation of algorithm 3.4 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg34(password, owner_entry, p_entry, id1_entry):
# 1. Create an encryption key based on the user password string, as
# described in algorithm 3.2.
key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry)
# 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
# using an RC4 encryption function with the encryption key from the
# preceding step.
U = utils.RC4_encrypt(key, _encryption_padding)
# 3. Store the result of step 2 as the value of the /U entry in the
# encryption dictionary.
return U, key
# Implementation of algorithm 3.4 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
# 1. Create an encryption key based on the user password string, as
# described in Algorithm 3.2.
key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
# 2. Initialize the MD5 hash function and pass the 32-byte padding string
# shown in step 1 of Algorithm 3.2 as input to this function.
m = md5()
m.update(_encryption_padding)
# 3. Pass the first element of the file's file identifier array (the value
# of the ID entry in the document's trailer dictionary; see Table 3.13 on
# page 73) to the hash function and finish the hash. (See implementation
# note 25 in Appendix H.)
m.update(id1_entry.original_bytes)
md5_hash = m.digest()
# 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
# function with the encryption key from step 1.
val = utils.RC4_encrypt(key, md5_hash)
# 5. Do the following 19 times: Take the output from the previous
# invocation of the RC4 function and pass it as input to a new invocation
# of the function; use an encryption key generated by taking each byte of
# the original encryption key (obtained in step 2) and performing an XOR
# operation between that byte and the single-byte value of the iteration
# counter (from 1 to 19).
for i in range(1, 20):
new_key = b_('')
for l in range(len(key)):
new_key += b_(chr(ord_(key[l]) ^ i))
val = utils.RC4_encrypt(new_key, val)
# 6. Append 16 bytes of arbitrary padding to the output from the final
# invocation of the RC4 function and store the 32-byte result as the value
# of the U entry in the encryption dictionary.
# (implementator note: I don't know what "arbitrary padding" is supposed to
# mean, so I have used null bytes. This seems to match a few other
# people's implementations)
return val + (b_('\x00') * 16), key
endesive-2.19.1/endesive/pdf/PyPDF2/utils.py 0000664 0000000 0000000 00000017755 15042366745 0020532 0 ustar 00root root 0000000 0000000 # Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Utility functions for PDF library.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
import sys
try:
import __builtin__ as builtins
except ImportError: # Py3
import builtins
xrange_fn = getattr(builtins, "xrange", range)
_basestring = getattr(builtins, "basestring", str)
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
string_type = getattr(builtins, "unicode", str)
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
# Make basic type tests more consistent
def isString(s):
"""Test if arg is a string. Compatible with Python 2 and 3."""
return isinstance(s, _basestring)
def isInt(n):
"""Test if arg is an int. Compatible with Python 2 and 3."""
return isinstance(n, int_types)
def isBytes(b):
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
return isinstance(b, bytes_type)
#custom implementation of warnings.formatwarning
def formatWarning(message, category, filename, lineno, line=None):
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
def readUntilWhitespace(stream, maxchars=None):
"""
Reads non-whitespace characters and returns them.
Stops upon encountering whitespace or when maxchars is reached.
"""
txt = b_("")
while True:
tok = stream.read(1)
if tok.isspace() or not tok:
break
txt += tok
if len(txt) == maxchars:
break
return txt
def readNonWhitespace(stream):
"""
Finds and reads the next non-whitespace character (ignores whitespace).
"""
tok = WHITESPACES[0]
while tok in WHITESPACES:
tok = stream.read(1)
return tok
def skipOverWhitespace(stream):
"""
Similar to readNonWhitespace, but returns a Boolean if more than
one whitespace character was read.
"""
tok = WHITESPACES[0]
cnt = 0;
while tok in WHITESPACES:
tok = stream.read(1)
cnt+=1
return (cnt > 1)
def skipOverComment(stream):
tok = stream.read(1)
stream.seek(-1, 1)
if tok == b_('%'):
while tok not in (b_('\n'), b_('\r')):
tok = stream.read(1)
def readUntilRegex(stream, regex, ignore_eof=False):
"""
Reads until the regular expression pattern matched (ignore the match)
Raise PdfStreamError on premature end-of-file.
:param bool ignore_eof: If true, ignore end-of-line and return immediately
"""
name = b_('')
while True:
tok = stream.read(16)
if not tok:
# stream has truncated prematurely
if ignore_eof == True:
return name
else:
raise PdfStreamError("Stream has ended unexpectedly")
m = regex.search(tok)
if m is not None:
name += tok[:m.start()]
stream.seek(m.start()-len(tok), 1)
break
name += tok
return name
class ConvertFunctionsToVirtualList(object):
def __init__(self, lengthFunction, getFunction):
self.lengthFunction = lengthFunction
self.getFunction = getFunction
def __len__(self):
return self.lengthFunction()
def __getitem__(self, index):
if isinstance(index, slice):
indices = xrange_fn(*index.indices(len(self)))
cls = type(self)
return cls(indices.__len__, lambda idx: self[indices[idx]])
if not isInt(index):
raise TypeError("sequence indices must be integers")
len_self = len(self)
if index < 0:
# support negative indexes
index = len_self + index
if index < 0 or index >= len_self:
raise IndexError("sequence index out of range")
return self.getFunction(index)
def RC4_encrypt(key, plaintext):
S = [i for i in range(256)]
j = 0
for i in range(256):
j = (j + S[i] + ord_(key[i % len(key)])) % 256
S[i], S[j] = S[j], S[i]
i, j = 0, 0
retval = []
for x in range(len(plaintext)):
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
t = S[(S[i] + S[j]) % 256]
retval.append(b_(chr(ord_(plaintext[x]) ^ t)))
return b_("").join(retval)
def matrixMultiply(a, b):
return [[sum([float(i)*float(j)
for i, j in zip(row, col)]
) for col in zip(*b)]
for row in a]
def markLocation(stream):
"""Creates text file showing current location in context."""
# Mainly for debugging
RADIUS = 5000
stream.seek(-RADIUS, 1)
outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
outputDoc.write(stream.read(RADIUS))
outputDoc.write('HERE')
outputDoc.write(stream.read(RADIUS))
outputDoc.close()
stream.seek(-RADIUS, 1)
class PyPdfError(Exception):
pass
class PdfReadError(PyPdfError):
pass
class PageSizeNotDefinedError(PyPdfError):
pass
class PdfReadWarning(UserWarning):
pass
class PdfStreamError(PdfReadError):
pass
if sys.version_info[0] < 3:
def b_(s):
return s
else:
B_CACHE = {}
def b_(s):
bc = B_CACHE
if s in bc:
return bc[s]
if type(s) == bytes:
return s
else:
r = s.encode('latin-1')
if len(s) < 2:
bc[s] = r
return r
def u_(s):
if sys.version_info[0] < 3:
return unicode(s, 'unicode_escape')
else:
return s
def str_(b):
if sys.version_info[0] < 3:
return b
else:
if type(b) == bytes:
return b.decode('latin-1')
else:
return b
def ord_(b):
if sys.version_info[0] < 3 or type(b) == str:
return ord(b)
else:
return b
def chr_(c):
if sys.version_info[0] < 3:
return c
else:
return chr(c)
def barray(b):
if sys.version_info[0] < 3:
return b
else:
return bytearray(b)
def hexencode(b):
if sys.version_info[0] < 3:
return b.encode('hex')
else:
import codecs
coder = codecs.getencoder('hex_codec')
return coder(b)[0]
def hexStr(num):
return hex(num).replace('L', '')
WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
def paethPredictor(left, up, up_left):
p = left + up - up_left
dist_left = abs(p - left)
dist_up = abs(p - up)
dist_up_left = abs(p - up_left)
if dist_left <= dist_up and dist_left <= dist_up_left:
return left
elif dist_up <= dist_up_left:
return up
else:
return up_left
endesive-2.19.1/endesive/pdf/PyPDF2/xmp.py 0000664 0000000 0000000 00000032507 15042366745 0020166 0 ustar 00root root 0000000 0000000 import re
import datetime
import decimal
from .generic import PdfObject
from xml.dom import getDOMImplementation
from xml.dom.minidom import parseString
from .utils import u_
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
# What is the PDFX namespace, you might ask? I might ask that too. It's
# a completely undocumented namespace used to place "custom metadata"
# properties, which are arbitrary metadata properties with no semantic or
# documented meaning. Elements in the namespace are key/value-style storage,
# where the element name is the key and the content is the value. The keys
# are transformed into valid XML identifiers by substituting an invalid
# identifier character with \u2182 followed by the unicode hex ID of the
# original character. A key like "my car" is therefore "my\u21820020car".
#
# \u2182, in case you're wondering, is the unicode character
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
# escaping characters.
#
# Intentional users of the pdfx namespace should be shot on sight. A
# custom data schema and sensical XML elements could be used instead, as is
# suggested by Adobe's own documentation on XMP (under "Extensibility of
# Schemas").
#
# Information presented here on the /pdfx/ schema is a result of limited
# reverse engineering, and does not constitute a full specification.
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
iso8601 = re.compile("""
(?P[0-9]{4})
(-
(?P[0-9]{2})
(-
(?P[0-9]+)
(T
(?P[0-9]{2}):
(?P[0-9]{2})
(:(?P[0-9]{2}(.[0-9]+)?))?
(?PZ|[-+][0-9]{2}:[0-9]{2})
)?
)?
)?
""", re.VERBOSE)
class XmpInformation(PdfObject):
"""
An object that represents Adobe XMP metadata.
Usually accessed by :meth:`getXmpMetadata()`
"""
def __init__(self, stream):
self.stream = stream
docRoot = parseString(self.stream.getData())
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
self.cache = {}
def writeToStream(self, stream, encryption_key):
self.stream.writeToStream(stream, encryption_key)
def getElement(self, aboutUri, namespace, name):
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
attr = desc.getAttributeNodeNS(namespace, name)
if attr != None:
yield attr
for element in desc.getElementsByTagNameNS(namespace, name):
yield element
def getNodesInNamespace(self, aboutUri, namespace):
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
for i in range(desc.attributes.length):
attr = desc.attributes.item(i)
if attr.namespaceURI == namespace:
yield attr
for child in desc.childNodes:
if child.namespaceURI == namespace:
yield child
def _getText(self, element):
text = ""
for child in element.childNodes:
if child.nodeType == child.TEXT_NODE:
text += child.data
return text
def _converter_string(value):
return value
def _converter_date(value):
m = iso8601.match(value)
year = int(m.group("year"))
month = int(m.group("month") or "1")
day = int(m.group("day") or "1")
hour = int(m.group("hour") or "0")
minute = int(m.group("minute") or "0")
second = decimal.Decimal(m.group("second") or "0")
seconds = second.to_integral(decimal.ROUND_FLOOR)
milliseconds = (second - seconds) * 1000000
tzd = m.group("tzd") or "Z"
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
if tzd != "Z":
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
tzd_hours *= -1
if tzd_hours < 0:
tzd_minutes *= -1
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
return dt
_test_converter_date = staticmethod(_converter_date)
def _getter_bag(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = []
for element in self.getElement("", namespace, name):
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
if len(bags):
for bag in bags:
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval.append(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_seq(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = []
for element in self.getElement("", namespace, name):
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
if len(seqs):
for seq in seqs:
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval.append(value)
else:
value = converter(self._getText(element))
retval.append(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_langalt(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = {}
for element in self.getElement("", namespace, name):
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
if len(alts):
for alt in alts:
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval[item.getAttribute("xml:lang")] = value
else:
retval["x-default"] = converter(self._getText(element))
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_single(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
value = None
for element in self.getElement("", namespace, name):
if element.nodeType == element.ATTRIBUTE_NODE:
value = element.nodeValue
else:
value = self._getText(element)
break
if value != None:
value = converter(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = value
return value
return get
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
"""
Contributors to the resource (other than the authors). An unsorted
array of names.
"""
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
"""
Text describing the extent or scope of the resource.
"""
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
"""
A sorted array of names of the authors of the resource, listed in order
of precedence.
"""
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
"""
A sorted array of dates (datetime.datetime instances) of signifigance to
the resource. The dates and times are in UTC.
"""
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
"""
A language-keyed dictionary of textual descriptions of the content of the
resource.
"""
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
"""
The mime-type of the resource.
"""
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
"""
Unique identifier of the resource.
"""
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
"""
An unordered array specifying the languages used in the resource.
"""
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
"""
An unordered array of publisher names.
"""
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
"""
An unordered array of text descriptions of relationships to other
documents.
"""
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
"""
A language-keyed dictionary of textual descriptions of the rights the
user has to this resource.
"""
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
"""
Unique identifier of the work from which this resource was derived.
"""
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
"""
An unordered array of descriptive phrases or keywrods that specify the
topic of the content of the resource.
"""
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
"""
A language-keyed dictionary of the title of the resource.
"""
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
"""
An unordered array of textual descriptions of the document type.
"""
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
"""
An unformatted text string representing document keywords.
"""
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
"""
The PDF file version, for example 1.0, 1.3.
"""
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
"""
The name of the tool that created the PDF document.
"""
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
"""
The date and time the resource was originally created. The date and
time are returned as a UTC datetime.datetime object.
"""
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
"""
The date and time the resource was last modified. The date and time
are returned as a UTC datetime.datetime object.
"""
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
"""
The date and time that any metadata for this resource was last
changed. The date and time are returned as a UTC datetime.datetime
object.
"""
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
"""
The name of the first known tool used to create the resource.
"""
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
"""
The common identifier for all versions and renditions of this resource.
"""
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
"""
An identifier for a specific incarnation of a document, updated each
time a file is saved.
"""
def custom_properties(self):
if not hasattr(self, "_custom_properties"):
self._custom_properties = {}
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
key = node.localName
while True:
# see documentation about PDFX_NAMESPACE earlier in file
idx = key.find(u_("\u2182"))
if idx == -1:
break
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
if node.nodeType == node.ATTRIBUTE_NODE:
value = node.nodeValue
else:
value = self._getText(node)
self._custom_properties[key] = value
return self._custom_properties
custom_properties = property(custom_properties)
"""
Retrieves custom metadata properties defined in the undocumented pdfx
metadata schema.
:return: a dictionary of key/value items for custom metadata properties.
:rtype: dict
"""
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/ 0000775 0000000 0000000 00000000000 15042366745 0020672 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2_annotate/__init__.py 0000664 0000000 0000000 00000000365 15042366745 0023007 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
from .annotator import PdfAnnotator
from .config.appearance import Appearance
from .config.location import Location
from .config.metadata import Metadata
__all__ = ['PdfAnnotator', 'Appearance', 'Location', 'Metadata']
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/ 0000775 0000000 0000000 00000000000 15042366745 0023227 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/__init__.py 0000664 0000000 0000000 00000000000 15042366745 0025326 0 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/base.py 0000664 0000000 0000000 00000017202 15042366745 0024515 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Annotation
~~~~~~~~~~~~
Base annotation class.
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
from ..pdfrw import PdfDict, PdfArray, PdfName, IndirectPdfDict
from ..config.constants import GRAPHICS_STATE_NAME
from ..config.metadata import serialize_value
from ..util.geometry import transform_rect
from ..util.geometry import translate
ALL_VERSIONS = ("1.3", "1.4", "1.5", "1.6", "1.7")
class Annotation(object):
"""Base class for all PDF annotation objects.
Concrete annotations should define the following:
* subtype (e.g. "Square")
* make_rect() - bounding box of annotation
* add_additional_pdf_object_data [optional] - additional entries to go
in the PDF object
* add_additional_resources [optional] - additional entries to go in the
Resources sub-dict of the annotation
There is a lot of nuance and viewer-specific (mostly Acrobat and Bluebeam)
details to consider when creating PDF annotations. One big thing that's not
immediately clear from the PDF spec is that wherever possible, we fill in
the annotations' type-specific details (e.g. BE and IC for squares), but
also create and include an Appearance Stream. The latter gives us control
over exactly how the annotation appears across different viewers, while the
former allows Acrobat or BB to regenerate the appearance stream during
editing.
"""
versions = ALL_VERSIONS
def __init__(self, location, appearance, metadata=None):
"""
:param Location location:
:param Appearance appearance:
:param Metadata metadata:
"""
self._location = location
self._appearance = appearance
self._metadata = metadata
def as_pdf_object(self, transform, page):
"""Return the PdfDict object representing the annotation, that will be
inserted as is into the PDF document.
:param list transform: Transformation matrix to transform the coords
of the annotation from client-specified space to PDF user space.
:param PdfDict page: The pdfrw page object from the PDF document
:returns PdfDict: the annotation object to be inserted into the PDF
"""
bounding_box = transform_rect(self.make_rect(), transform)
appearance_stream = self._make_appearance_stream_dict(bounding_box, transform)
obj = PdfDict(
Type=PdfName("Annot"),
Subtype=PdfName(self.subtype),
Rect=bounding_box,
AP=appearance_stream,
P=page,
)
self._add_metadata(obj, self._metadata)
self.add_additional_pdf_object_data(obj)
obj.indirect = True
return obj
@property
def page(self):
return self._location.page
def validate(self, pdf_version):
"""Validate a new annotation against a given PDF version."""
pass
def _add_metadata(self, obj, metadata):
if metadata is None:
return
for name, value in metadata.iter():
obj[PdfName(name)] = serialize_value(value)
def _make_ap_resources(self):
"""Make the Resources entry for the appearance stream dictionary.
Implement add_additional_resources to add additional entries -
fonts, XObjects, graphics state - to the Resources dictionary.
"""
resources = IndirectPdfDict(ProcSet=PdfArray([PdfName("PDF")]))
self._add_graphics_state_resources(resources, self._appearance)
self._add_xobject_resources(resources, self._appearance)
self._add_font_resources(resources, self._appearance)
self.add_additional_resources(resources)
return resources
@staticmethod
def _add_font_resources(resources, A):
if A.fonts:
resources.Font = IndirectPdfDict()
for font_name, font in A.fonts.items():
resources.Font[PdfName(font_name)] = font
@staticmethod
def _add_xobject_resources(resources, A):
"""Adds in provided, explicit XObjects into the appearance stream's
Resources dict. This is used when the user is explicitly specifying the
appearance stream and they want to include, say, an image.
"""
if A.xobjects:
resources.XObject = IndirectPdfDict()
for xobject_name, xobject in A.xobjects.items():
resources.XObject[PdfName(xobject_name)] = xobject
@staticmethod
def _add_graphics_state_resources(resources, A):
"""Add in the resources dict for turning on transparency in the
graphics state. For example, if both stroke and fill were transparent,
this would add:
<< /ExtGState /PdfAnnotatorGS <<
/CA 0.5 /ca 0.75 /Type /ExtGState
>> >>
to the Resources dict.
Graphics states can also be specified externally, for use in explicit
content streams. This is done by using the `graphics_states` property
on the appearance object.
"""
states = []
internal_state = Annotation._get_internal_graphics_state(resources, A)
if internal_state is not None:
states.append((GRAPHICS_STATE_NAME, internal_state))
if A.graphics_states:
for name, state in A.graphics_states.items():
states.append((name, state.as_pdf_dict()))
if states:
resources.ExtGState = PdfDict()
for name, state in states:
resources.ExtGState[PdfName(name)] = state
@staticmethod
def _get_internal_graphics_state(resources, A):
internal_state = A.get_graphics_state()
if internal_state.has_content():
return internal_state.as_pdf_dict()
return None
def _make_appearance_stream_dict(self, bounding_box, transform):
# Either use user-specified content stream or generate content stream
# based on annotation type.
stream = self._appearance.appearance_stream
if stream is None:
stream = self.make_appearance_stream()
resources = self._make_ap_resources()
# Transform the appearance stream into PDF space and turn it into a str
appearance_stream = stream.transform(transform).resolve()
normal_appearance = IndirectPdfDict(
stream=appearance_stream,
BBox=bounding_box,
Resources=resources,
Matrix=translate(-bounding_box[0], -bounding_box[1]),
Type=PdfName("XObject"),
Subtype=PdfName("Form"),
FormType=1,
)
return PdfDict(N=normal_appearance)
def add_additional_pdf_object_data(self, obj):
"""Add additional keys to the PDF object. Default is a no-op.
:param PdfDict obj: the PDF object to be inserted into the PDF
"""
pass
def add_additional_resources(self, resources):
"""Add additional keys to the Resources PDF dictionary. Default is a
no-op.
:param PdfDict resources: Resources PDF dictionary
"""
pass
def make_rect(self):
"""Return a bounding box that encompasses the entire annotation."""
raise NotImplementedError()
def make_border_dict(appearance):
A = appearance
return _make_border_dict(A.stroke_width, A.border_style, A.dash_array)
def _make_border_dict(width, style, dash_array=None):
border = PdfDict(Type=PdfName("Border"), W=width, S=PdfName(style))
if dash_array:
if style != "D":
raise ValueError("Dash array only applies to dashed borders!")
border.D = dash_array
return border
class Stamp(object):
subtype = "Stamp"
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/image.py 0000664 0000000 0000000 00000021572 15042366745 0024672 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Image
~~~~~~~~~~~~
Image annotation class.
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
import sys
import zlib
from io import BytesIO
from PIL import Image as PILImage
from PIL.ImageFile import ImageFile
from ..pdfrw import PdfDict, PdfName, IndirectPdfDict
from .rect import RectAnnotation
from ..config.appearance import set_appearance_state
from ..config.constants import CMYK_MODE
from ..config.constants import GRAYSCALE_ALPHA_MODE
from ..config.constants import GRAYSCALE_MODE
from ..config.constants import PALETTE_MODE
from ..config.constants import RGB_MODE
from ..config.constants import RGBA_MODE
from ..config.constants import SINGLE_CHANNEL_MODE
from ..graphics import ContentStream
from ..graphics import CTM
from ..graphics import Rect
from ..graphics import Restore
from ..graphics import Save
from ..graphics import XObject
from ..util.geometry import matrix_multiply
from ..util.geometry import scale
from ..util.geometry import translate
class Image(RectAnnotation):
"""A basic Image annotation class.
There is no native image annotation in the PDF spec, but image annotations
can be easily approximated by drawing an Image XObject in the appearance
stream.
Supported formats are PNG, JPEG, and GIF. Only the first frame from multi-
frame GIFs is used.
This implementation relies on the python Pillow library to retrieve the
image's raw sample data, then formats that data as expected by the PDF
spec.
Additional work needs to be done. For example:
- supporting the reading of transparency directly from RGBA images
- better compression (e.g. using a Predictor value for FlateDecode)
- supporting DeviceCMYK directly
"""
subtype = "Square"
_image_xobject = None # PdfDict of Image XObject
def add_additional_resources(self, resources):
resources[PdfName("XObject")] = IndirectPdfDict(Image=self.image_xobject)
def add_additional_pdf_object_data(self, obj):
obj[PdfName("Image")] = self.image_xobject
@property
def image_xobject(self):
if self._image_xobject is None:
self._image_xobject = self.make_image_xobject(self._appearance.image)
return self._image_xobject
@staticmethod
def make_image_xobject(image):
"""Construct a PdfDict representing the Image XObject, for inserting
into the AP Resources dict.
PNGs and GIFs are treated equally - the raw sample values are included
using PDF's FlateDecode compression format. JPEGs can be included in
their original form using the DCTDecode filter.
PNGs with transparency have the alpha channel split out and included as
an SMask, since PDFs don't natively support transparent PNGs.
Details about file formats and allowed modes can be found at
https://pillow.readthedocs.io/en/5.3.x/handbook/image-file-formats.html
:param str|ImageFile image: Either a str representing the path to the
image filename, or a PIL.ImageFile.ImageFile object representing
the image loaded using the PIL library.
:returns PdfDict: Image XObject
"""
image = Image.resolve_image(image)
# PILImage.convert drops the format attribute
image_format = image.format
width, height = image.size
# Normalize images to RGB or grayscale color spaces, and split out the
# alpha layer into a PDF smask XObject
image, smask_xobj = Image.convert_to_compatible_image(image, image_format)
if image_format in ("PNG", "GIF"):
content = Image.make_compressed_image_content(image)
filter_type = "FlateDecode" # TODO use a predictor
elif image_format == "JPEG":
content = Image.make_jpeg_image_content(image)
filter_type = "DCTDecode"
else:
raise ValueError(
"Unsupported image format: {}. Supported formats are "
"PNG, JPEG, and GIF".format(image.format)
)
xobj = IndirectPdfDict(
stream=content,
BitsPerComponent=8,
Filter=PdfName(filter_type),
ColorSpace=Image._get_color_space_name(image),
Width=width,
Height=height,
Subtype=PdfName("Image"),
Type=PdfName("XObject"),
)
if smask_xobj is not None:
xobj[PdfName("SMask")] = smask_xobj
return xobj
@staticmethod
def convert_to_compatible_image(image, image_format):
smask_xobj = None
if image_format in ("PNG", "GIF"):
if image.mode in (RGBA_MODE, GRAYSCALE_ALPHA_MODE):
smask_xobj = Image.get_png_smask(image)
# 'P' and 'RGBA' images can just be converted to 'RGB' mode, since
# alpha layer is preserved in the smask.
if image.mode in (RGBA_MODE, PALETTE_MODE):
image = image.convert(RGB_MODE)
if image.mode == GRAYSCALE_ALPHA_MODE:
image = image.convert(GRAYSCALE_MODE)
if image.mode == CMYK_MODE:
# The DeviceCMYK PDF color space has some weird properties. In a
# future release we can debug these and be smart about it but this
# is an easy workaround.
image = image.convert(RGB_MODE)
return image, smask_xobj
@staticmethod
def get_png_smask(image):
width, height = image.size
smask = Image.make_compressed_image_content(image.getchannel("A"))
smask_xobj = IndirectPdfDict(
stream=smask,
Width=width,
Height=height,
BitsPerComponent=8,
Filter=PdfName("FlateDecode"),
ColorSpace=PdfName("DeviceGray"),
Subtype=PdfName("Image"),
Type=PdfName("XObject"),
)
return smask_xobj
@staticmethod
def resolve_image(image_or_filename):
if isinstance(image_or_filename, str):
return PILImage.open(image_or_filename)
elif isinstance(image_or_filename, ImageFile):
return image_or_filename
raise ValueError(
"Invalid image format: {}".format(image_or_filename.__class__.__name__)
)
@staticmethod
def _get_color_space_name(image):
if image.mode == RGB_MODE:
return PdfName("DeviceRGB")
elif image.mode in (GRAYSCALE_MODE, SINGLE_CHANNEL_MODE):
return PdfName("DeviceGray")
raise ValueError("Image color space not yet supported")
@staticmethod
def make_compressed_image_content(image):
compressed = zlib.compress(Image.get_raw_image_bytes(image))
return compressed
@staticmethod
def make_jpeg_image_content(image):
file_obj = BytesIO()
# This is the only way to preserve the JPEG encoding. It also seems to
# recompress the data, so that the raw bytes of this differ from the
# raw bytes of the original file. It'll probably be better to provide a
# special wrapper around PILImage that preserves the original bytes so
# we can just use those for JPEGs. TODO.
image.save(file_obj, format="JPEG")
return file_obj.getvalue()
@staticmethod
def get_decoded_bytes(content):
# Right now, pdfrw needs strings, not bytes like you'd expect in py3,
# for binary stream objects. This might change in future versions.
if sys.version_info.major < 3:
return content
return content.decode("Latin-1")
@staticmethod
def get_raw_image_bytes(image):
if image.mode in (GRAYSCALE_MODE, SINGLE_CHANNEL_MODE):
# If this is grayscale or single-channel, we can avoid dealing with
# the nested tuples in multi-channel images. This bytes/bytearray
# wrapped approach is the only way that works in both py2 and py3.
return bytes(bytearray(image.getdata()))
elif image.mode == RGB_MODE:
raw_image_data = list(image.getdata())
array = bytearray()
for rgb in raw_image_data:
array.extend(rgb)
return bytes(array)
raise ValueError("Image color space not yet supported")
@staticmethod
def get_ctm(x1, y1, x2, y2):
"""Get the scaled and translated CTM for an image to be placed in the
bounding box defined by [x1, y1, x2, y2].
"""
return matrix_multiply(translate(x1, y1), scale(x2 - x1, y2 - y1))
def make_appearance_stream(self):
A = self._appearance
L = self._location
stream = ContentStream([Save()])
set_appearance_state(stream, A)
stream.extend(
[
Rect(L.x1, L.y1, L.x2 - L.x1, L.y2 - L.y1),
CTM(self.get_ctm(L.x1, L.y1, L.x2, L.y2)),
XObject("Image"),
Restore(),
]
)
return stream
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/points.py 0000664 0000000 0000000 00000010352 15042366745 0025116 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Points annotations
~~~~~~~~~~~~~~~~~~~~~~~~~
Annotations that are defined by a series of points: Line, Polygon, Polyline
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
from .base import Annotation
from .base import make_border_dict
from ..pdfrw import PdfArray
from ..config.appearance import set_appearance_state
from ..config.appearance import stroke_or_fill
from ..graphics import Close
from ..graphics import ContentStream
from ..graphics import Line as CSLine
from ..graphics import Move
from ..graphics import Restore
from ..graphics import Save
from ..graphics import Stroke
def flatten_points(points):
return PdfArray([v for point in points for v in point])
class PointsAnnotation(Annotation):
"""An abstract annotation that defines its location on the document with
an array of points.
"""
def make_rect(self):
L = self._location
stroke_width = self._appearance.stroke_width
p = L.points[0]
min_x, max_x, min_y, max_y = p[0], p[1], p[0], p[1]
for x, y in L.points:
min_x = min(min_x, x)
max_x = max(max_x, x)
min_y = min(min_y, y)
max_y = max(max_y, y)
return [
min_x - stroke_width,
min_y - stroke_width,
max_x + stroke_width,
max_y + stroke_width,
]
def base_points_object(self):
obj = self.make_base_object()
obj.BS = make_border_dict(self._appearance)
obj.C = self._appearance.stroke_color
# TODO line endings, leader lines, captions
return obj
class Line(PointsAnnotation):
subtype = "Line"
def make_appearance_stream(self):
A = self._appearance
points = self._location.points
stream = ContentStream([Save()])
set_appearance_state(stream, A)
stream.add(Move(points[0][0], points[0][1]))
stream.add(CSLine(points[1][0], points[1][1]))
stroke_or_fill(stream, A)
stream.add(Restore())
return stream
def add_additional_pdf_object_data(self, obj):
# TODO line endings, leader lines, captions
obj[PdfName("L")] = flatten_points(self._location.points)
class Polygon(PointsAnnotation):
subtype = "Polygon"
versions = ("1.5", "1.6", "1.7")
def make_appearance_stream(self):
A = self._appearance
points = self._location.points
stream = ContentStream([Save()])
set_appearance_state(stream, A)
stream.add(Move(points[0][0], points[0][1]))
for x, y in points[1:]:
stream.add(CSLine(x, y))
stream.add(Close())
stroke_or_fill(stream, A)
stream.add(Restore())
return stream
def add_additional_pdf_object_data(self, obj):
if self._appearance.fill:
obj[PdfName("IC")] = self._appearance.fill
obj[PdfName("Vertices")] = flatten_points(self._location.points)
class Polyline(PointsAnnotation):
subtype = "PolyLine"
versions = ("1.5", "1.6", "1.7")
def make_appearance_stream(self):
A = self._appearance
points = self._location.points
stream = ContentStream([Save()])
set_appearance_state(stream, A)
stream.add(Move(points[0][0], points[0][1]))
for x, y in points[1:]:
stream.add(CSLine(x, y))
# TODO add a 'close' attribute?
stream.extend([Stroke(), Restore()])
return stream
def add_additional_pdf_object_data(self, obj):
obj[PdfName("Vertices")] = flatten_points(self._location.points)
class Ink(PointsAnnotation):
subtype = "Ink"
def make_appearance_stream(self):
A = self._appearance
points = self._location.points
stream = ContentStream([Save()])
set_appearance_state(stream, A)
stream.add(Move(points[0][0], points[0][1]))
# TODO "real" PDF editors do smart smoothing of ink points using
# interpolated Bezier curves.
for x, y in points[1:]:
stream.add(CSLine(x, y))
stream.extend([Stroke(), Restore()])
return stream
def add_additional_pdf_object_data(self, obj):
obj[PdfName("InkList")] = PdfArray([flatten_points(self._location.points)])
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/rect.py 0000664 0000000 0000000 00000013103 15042366745 0024534 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Rectangular Annotations
~~~~~~~~~~~~~~~~~~~~~~~
Annotations defined by a width and a height: Square, Circle
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
from .base import Annotation
from .base import make_border_dict
from ..pdfrw import PdfArray, PdfName
from ..config.appearance import set_appearance_state
from ..config.appearance import stroke_or_fill
from ..graphics import Bezier
from ..graphics import Close
from ..graphics import ContentStream
from ..graphics import Line
from ..graphics import Move
from ..graphics import quadratic_to_cubic_bezier
from ..graphics import Rect
from ..graphics import Restore
from ..graphics import Save
class RectAnnotation(Annotation):
"""Abstract annotation that defines its location on the document with a
width and a height.
"""
def make_rect(self):
stroke_width = self._appearance.stroke_width
L = self._location
return [
L.x1 - stroke_width,
L.y1 - stroke_width,
L.x2 + stroke_width,
L.y2 + stroke_width,
]
def add_additional_pdf_object_data(self, obj):
A = self._appearance
obj[PdfName("BS")] = make_border_dict(A)
obj[PdfName("C")] = A.stroke_color
if A.fill:
obj[PdfName("IC")] = A.fill
padding = A.stroke_width / 2.0
obj[PdfName("RD")] = PdfArray([padding, padding, padding, padding])
class Square(RectAnnotation):
subtype = "Square"
def make_appearance_stream(self):
L = self._location
A = self._appearance
stream = ContentStream([Save()])
set_appearance_state(stream, A)
stream.add(Rect(L.x1, L.y1, L.x2 - L.x1, L.y2 - L.y1))
stroke_or_fill(stream, A)
stream.add(Restore())
# TODO dash array
return stream
def add_rounded_rectangle(stream, x, y, width, height, rx, ry):
"""Creates a rounded rectangle and adds it to the content stream.
:param ContentStream stream:
:param float x1:
:param float y1:
:param float width:
:param float height:
:param float rx: x radius of the rounded corners
:param float ry: y radius of the rounded corners
"""
stream.add(Move(x + rx, y))
stream.add(Line(x + width - rx, y))
stream.add(
quadratic_to_cubic_bezier(
start_x=(x + width - rx),
start_y=y,
control_x=(x + width),
control_y=y,
end_x=(x + width),
end_y=(y + ry),
)
)
stream.add(Line(x + width, y + height - ry))
stream.add(
quadratic_to_cubic_bezier(
start_x=(x + width),
start_y=(y + height - ry),
control_x=(x + width),
control_y=(y + height),
end_x=(x + width - rx),
end_y=(y + height),
)
)
stream.add(Line(x + rx, y + height))
stream.add(
quadratic_to_cubic_bezier(
start_x=(x + rx),
start_y=(y + height),
control_x=x,
control_y=(y + height),
end_x=x,
end_y=(y + height - ry),
)
)
stream.add(Line(x, y + ry))
stream.add(
quadratic_to_cubic_bezier(
start_x=x,
start_y=(y + ry),
control_x=x,
control_y=y,
end_x=(x + rx),
end_y=y,
)
)
stream.add(Close())
def add_bezier_circle(stream, x1, y1, x2, y2):
"""Create a circle from four bezier curves and add it to the content stream,
since PDF graphics is missing an ellipse primitive.
:param ContentStream stream:
:param float x1:
:param float y1:
:param float x2:
:param float y2:
"""
left_x = x1
right_x = x2
bottom_x = left_x + (right_x - left_x) / 2.0
top_x = bottom_x
bottom_y = y1
top_y = y2
left_y = bottom_y + (top_y - bottom_y) / 2.0
right_y = left_y
cp_offset = 0.552284749831
# Move to the bottom of the circle, then four curves around.
# https://stackoverflow.com/questions/1734745/how-to-create-circle-with-b%C3%A9zier-curves
stream.add(Move(bottom_x, bottom_y))
stream.add(
Bezier(
bottom_x + (right_x - bottom_x) * cp_offset,
bottom_y,
right_x,
right_y - (right_y - bottom_y) * cp_offset,
right_x,
right_y,
)
)
stream.add(
Bezier(
right_x,
right_y + (top_y - right_y) * cp_offset,
top_x + (right_x - top_x) * cp_offset,
top_y,
top_x,
top_y,
)
)
stream.add(
Bezier(
top_x - (top_x - left_x) * cp_offset,
top_y,
left_x,
left_y + (top_y - left_y) * cp_offset,
left_x,
left_y,
)
)
stream.add(
Bezier(
left_x,
left_y - (left_y - bottom_y) * cp_offset,
bottom_x - (bottom_x - left_x) * cp_offset,
bottom_y,
bottom_x,
bottom_y,
)
)
stream.add(Close())
class Circle(RectAnnotation):
"""Circles and Squares are basically the same PDF annotation but with
different content streams.
"""
subtype = "Circle"
def make_appearance_stream(self):
L = self._location
A = self._appearance
stream = ContentStream([Save()])
set_appearance_state(stream, A)
add_bezier_circle(stream, L.x1, L.y1, L.x2, L.y2)
stroke_or_fill(stream, A)
stream.add(Restore())
return stream
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/signature.py 0000664 0000000 0000000 00000045505 15042366745 0025613 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Signature Annotations
~~~~~~~~~~~~~~~~~~~~~~~
Annotations defined by a width and a height: Square, Circle
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
import os.path
from ..pdfrw import PdfArray, PdfName, IndirectPdfDict, PdfDict
from ..pdfttf import TTFFont
from .image import Image
from .text import get_text_commands
from .base import Annotation
from .base import make_border_dict
from ..util.geometry import transform_rect
from ..util.geometry import identity
from ..config.appearance import set_appearance_state
from ..config.appearance import stroke_or_fill
from ..config.constants import DEFAULT_BASE_FONT
from ..config.constants import GRAPHICS_STATE_NAME
from ..config.constants import PDF_ANNOTATOR_FONT
from ..graphics import BaseCommand, FloatTupleCommand
from ..graphics import Bezier
from ..graphics import Close
from ..graphics import ContentStream
from ..graphics import Line
from ..graphics import Move
from ..graphics import quadratic_to_cubic_bezier
from ..graphics import Rect
from ..graphics import Restore
from ..graphics import Save
from ..graphics import BeginText, Text, EndText, TextMatrix
from ..graphics import Font
from ..graphics import FillColor, StrokeColor, Stroke, StrokeWidth, Fill
from ..graphics import CTM, XObject
HELVETICA_PATH = os.path.join(os.path.dirname(__file__), "..", "fonts", "Helvetica.ttf")
class Signature(Annotation):
"""Signatur annotation that defines its location on the document with a
width and a height. Internal structure follows that which is documented
by Adobe, with /frm referencing a blank /n0 layer with all the appearance
in the stream of the /n2 layer.
"""
subtype = 'Widget'
def __init__(self, location, appearance, metadata=None):
super(Signature, self).__init__(location, appearance, metadata)
self._images = {}
self._ttf = {}
self._type1 = {}
def add_additional_resources(self, resources):
# TODO: additional resources like: fonts, ... that need to be included in pdf
return
try:
resources.Font
except AttributeError:
resources.Font = IndirectPdfDict()
for font_name, font in self.ttf.items():
resources.Font[PdfName(font_name)] = font
def simple_signature(self, signature):
"""
simple_signature(self, signature)
A simple signature annotation whose appearance is inspired by
the signatures from Adobe's appearances. The text is output
using the default font, and no calls to add_image or add_font
are needed to use this.
signature = dict(
background = Image with alpha / None,
icon = Image with alpha / None,
labels = bool,
CN = str,
DN = str,
date = str,
contact = str,
reason = str,
location = str,
software = str,
outline = [R, G, B],
border = int,
)
"""
bbox = self._internal_location()
processor = SignatureAppearance(bbox, self._appearance, self._ttf, self._images)
height = bbox[3] - bbox[1]
width = bbox[2] - bbox[0]
t_left = 5
t_top = 5
border = signature.get('border', 0)
block = []
if signature.get('text', False):
block.append(signature['text'])
else:
if signature.get('labels', False):
labels = dict(
DN = 'DN: ',
CN = 'Digitally signed by ',
date = 'Date: ',
contact = 'Contact: ',
reason = 'Reason: ',
location = 'Location: ',
software = 'Signing software: ',
)
else:
labels = {}
for i in ('CN', 'DN', 'date', 'reason', 'contact', 'location', 'software'):
if i not in signature:
continue
block.append('{}{}'.format(labels.get(i, ''), signature[i]))
cs = ContentStream([Save()])
if 'background' in signature:
if type(signature['background']) == list:
cs.extend(processor.fill_colour(*signature['background']))
cs.extend(processor.rect_fill(*bbox))
else:
if 'bg' not in self._images:
self.add_image(signature['background'], 'Bg')
cs.extend(processor.image('Bg', *bbox))
cs.extend(processor.fill_colour(*signature.get('outline', [0, 0, 0])))
cs.extend(processor.stroke_colour(*signature.get('outline', [0, 0, 0])))
if border:
cs.extend(processor.border(border))
if 'icon' in signature:
if 'icon' not in self._images:
self.add_image(signature['icon'], 'Icon')
i_w = self._images['Icon']['/Width']
i_h = self._images['Icon']['/Height']
if block:
factor = 2.0
else:
factor = 1.0
if i_h <= height and i_w <= width/factor:
# image smaller than alloted half of box
# vertically centre icon and expand text region
t_left += i_w + 5
dist = (height - i_h)/2
if block:
i_box = [bbox[0], bbox[1]+dist, i_w+bbox[0], bbox[3]-dist]
else:
dist2 = (width - i_w)/2
i_box = [bbox[0]+dist2, bbox[1]+dist, bbox[2]-dist2, bbox[3]-dist]
elif (i_w/i_h) > ((width/factor)/height):
# too wide
t_left += (width/factor) + 5
scale = (width/factor)/i_w
dist = (height - i_h*scale)/2
i_box = [bbox[0], bbox[1]+dist, width/factor, bbox[3]-dist]
else:
# too tall
scale = height/i_h
t_left += (i_w*scale) + 5
if block:
i_box = [bbox[0], bbox[1], bbox[0]+i_w*scale, bbox[3]]
else:
# centre if only the icon
dist = (width - i_w*scale)/2
i_box = [bbox[0]+dist, bbox[1], bbox[2]-dist, bbox[3]]
cs.extend(processor.image('Icon', *i_box))
if block:
font = self.get_default_font()
cs.extend(processor.text_box('\n'.join(block), font, t_left, 5, (bbox[2]-5)-t_left, height-5))
#cs.extend(processor.text_box('\n'.join(block), font, t_left, t_top, width-2*t_left, height-2*t_top))
cs.add(Restore())
self._n2_layer = cs
def set_signature_appearance(self, *directives):
"""
set_signature_apperance(self, *directives)
Use the provided list of lists to compute a signature appearance
using calls to the SignatureAppearance processor. Note that the
text_box directive can only be used with a TTF font.
"""
processor = SignatureAppearance(self._internal_location(), self._appearance, self._ttf, self._images)
cs = ContentStream([Save()])
for x in directives:
if x[0] in processor.template:
directives = processor.template[x[0]](processor, *x[1:])
if type(directives) != list:
directives = [directives]
cs.extend(directives)
cs.add(Restore())
self._n2_layer = cs
def make_rect(self):
stroke_width = self._appearance.stroke_width
L = self._location
return [
L.x1 - stroke_width,
L.y1 - stroke_width,
L.x2 + stroke_width,
L.y2 + stroke_width,
]
def add_default_font(self):
self.add_ttf_font(HELVETICA_PATH, PDF_ANNOTATOR_FONT)
def get_default_font(self):
if PDF_ANNOTATOR_FONT not in self._ttf:
self.add_ttf_font(HELVETICA_PATH, PDF_ANNOTATOR_FONT)
return self._ttf[PDF_ANNOTATOR_FONT]
def add_type1_font(self, base_font, name="Font", encoding='WinAnsiEncoding'):
type1 = (
'Helvetica', 'Helvetica-Bold', 'Helvetica-Oblique', 'Helvetica-BoldOblique',
'Courier', 'Courier-Bold', 'Courier-Oblique', 'Courier-BoldOblique',
'Times-Roman', 'Times-Bold', 'Times-Italic', 'Times-BoldItalic',
'Symbol', 'ZapfDingbats',
)
if base_font in ('Symbol', 'ZapfDingbats'):
encoding = base_font
self._type1[name] = PdfDict(
Type = PdfName('Font'),
Subtype = PdfName('Type1'),
Name = PdfName(name),
BaseFont = PdfName(base_font),
Encoding = PdfName(encoding),
)
def add_ttf_font(self, path, name="Font"):
font = TTFFont(path)
self._ttf[name] = font
def add_image(self, obj, name="Image"):
self._images[name] = Image.make_image_xobject(obj)
def _make_appearance_stream_dict(self, bounding_box, transform):
self._make_n0()
self._make_n2()
self._make_frm()
self._make_apn()
return PdfDict(N=self._apn)
def as_pdf_object(self, transform, page):
"""Return the PdfDict object representing the annotation, that will be
inserted as is into the PDF document.
:param list transform: Transformation matrix to transform the coords
of the annotation from client-specified space to PDF user space.
:param PdfDict page: The pdfrw page object from the PDF document
:returns PdfDict: the annotation object to be inserted into the PDF
"""
bounding_box = transform_rect(self.make_rect(), transform)
appearance_stream = self._make_appearance_stream_dict(bounding_box, transform)
obj = PdfDict(
Type=PdfName("Annot"),
Subtype=PdfName(self.subtype),
Rect=bounding_box,
AP=appearance_stream,
P=page,
)
self._add_metadata(obj, self._metadata)
self.add_additional_pdf_object_data(obj)
obj.indirect = True
return obj
def _internal_location(self):
L = self._location
return (0, 0, L.x2 - L.x1, L.y2 - L.y1)
def _make_apn(self):
self._apn = IndirectPdfDict(
BBox = self._internal_location(),
Resources = dict(
ProcSet = PdfArray([PdfName('PDF')]),
XObject = {'FRM': self._frm},
),
Type = PdfName('XObject'),
Subtype = PdfName('Form'),
FormType = 1,
Matrix = identity(),
)
self._apn['stream'] = 'q 1 0 0 1 0 0 cm /FRM Do Q'
def _make_frm(self):
self._frm = IndirectPdfDict(
BBox = self._internal_location(),
Resources = dict(
ProcSet = PdfArray([PdfName('PDF')]),
XObject = {'n0': self._n0, 'n2': self._n2},
),
Matrix = identity(),
Type = PdfName('XObject'),
Subtype = PdfName('Form'),
)
self._frm['stream'] = 'q 1 0 0 1 0 0 cm /n0 Do Q q 1 0 0 1 0 0 cm /n2 Do Q'
def _make_n0(self):
self._n0 = IndirectPdfDict(
BBox = self._internal_location(),
Type = PdfName('XObject'),
Subtype = PdfName('Form'),
FormType = 1,
Matrix = identity(),
Resources = {'ProcSet': PdfArray([PdfName('Text')])},
)
self._n0['stream'] = '% DSBlank'
def _make_n2(self):
resources = dict(
ProcSet = PdfArray([PdfName('PDF'), PdfName('Text'), PdfName('ImageC')])
)
fonts = PdfDict()
if self._ttf:
for name, font in self._ttf.items():
fonts[name] = font.get_font()
if self._type1:
for name, font in self._type1.items():
fonts[name] = font
if fonts:
resources[PdfName('Font')] = fonts
if self._images:
resources[PdfName('XObject')] = self._images
self._n2 = IndirectPdfDict(
BBox = self._internal_location(),
Matrix = identity(),
Type = PdfName('XObject'),
Subtype = PdfName('Form'),
FormType = 1,
Resources = resources,
)
self._n2['stream'] = self._n2_layer.resolve()
class SignatureAppearance():
def __init__(self, box, appearance, ttf, images):
self._in_text = False
self._reset_font = True
self._reset_tm = False
self._cur_font = (PDF_ANNOTATOR_FONT, appearance.font_size)
self._cur_tm = [1, 0, 0, 1, 0, 0]
self._sc = [0, 0, 0]
self._fc = [0, 0, 0]
self._bounds = box
self._ttf = ttf
self._images = images
def fill_colour(self, *colour):
self._fc = colour
return [FillColor(*colour)]
def stroke_colour(self, *colour):
self._sc = colour
return [StrokeColor(*colour)]
def border(self, inset):
box = self._bounds
return [
Rect(inset, inset, box[2]-2*inset, box[3]-2*inset),
Stroke()
]
def image(self, image_name, x1, y1, x2, y2, distort=True, centred=True):
if distort:
scale_x = x2-x1
scale_y = y2-y1
else:
max_w = x2-x1
max_h = y2-y1
i_w = self._images[image_name]['/Width']
i_h = self._images[image_name]['/Height']
if i_w/i_h >= max_w/max_h:
# image proportionally wider than box
scale_x = x2-x1
scale_y = scale_x*(i_h/i_w)
if centred: # centre vertically
dist = (max_h-scale_y)/2.0
y1 += dist
elif i_w/i_h < max_w/max_h:
# image proportionally narrower than box
scale_y = y2-y1
scale_x = scale_y*(i_w/i_h)
if centred: # centre horizontally
dist = (max_w-scale_x)/2.0
x1 += dist
commands = []
if self._in_text:
commands.append(EndText())
self._reset_font = True
self._reset_tm = True
self._in_text = False
commands.append(Save())
commands.append(CTM((scale_x, 0, 0, scale_y, x1, y1)))
commands.append(XObject(image_name))
commands.append(Restore())
return commands
def rect(self, *box):
seq = []
if self._in_text:
seq.append(EndText())
self._in_text = False
seq.extend([
Rect(*box),
Stroke()
])
return seq
def rect_fill(self, *box):
seq = []
if self._in_text:
seq.append(EndText())
self._in_text = False
seq.extend([
Rect(*box),
Fill()
])
return seq
def reset(self):
return [
StrokeColor(0, 0, 0),
FillColor(0, 0, 0),
]
def text_position(self, x, y):
if x < 0:
x = self._bounds[2]+x
if y < 0:
y = self._bounds[3]+y
self._cur_tm[4] = x
self._cur_tm[5] = y
if self._in_text:
return [TextMatrix(self._cur_tm.copy())]
self._reset_tm = True
return []
def font(self, name, size):
if name == 'default':
name = PDF_ANNOTATOR_FONT
self._cur_font = (name, size)
if self._in_text:
return [Font(name, size), TextLeading(size*1.2)]
self._reset_font = True
return []
def text_box(self, text, ttf_font, x, y, width, height, font_size=8, wrap_text=True, align='left', baseline='middle', line_spacing=1.2):
if type(ttf_font) == str:
font = self._ttf.get(ttf_font, None)
if font is None:
font = self._ttf[PDF_ANNOTATOR_FONT]
fontname = PDF_ANNOTATOR_FONT
else:
fontname = ttf_font
else:
font = ttf_font
fontname = font.font["name"]
if fontname == 'Helvetica':
fontname = PDF_ANNOTATOR_FONT
font.set_size(font_size)
font.set_text(text)
commands = []
if not self._in_text:
commands.extend([BeginText(), Font(fontname, font_size)])
commands.extend(get_text_commands(x, y, x+width, y+height, text, font_size, wrap_text, align, baseline, line_spacing, font))
if not self._in_text:
commands.append(EndText())
return commands
def text(self, text):
if self._cur_font[0] in self._ttf:
ttf_font = self._ttf[self._cur_font[0]]
ttf_font.set_size(self._cur_font[1])
ttf_font.set_text(text)
text = text.encode("utf-16be").decode("latin1")
commands = []
if not self._in_text:
commands.append(BeginText())
self._in_text = True
if self._reset_tm:
commands.append(TextMatrix(self._cur_tm.copy()))
self._reset_tm = False
if self._reset_font:
commands.append(Font(*self._cur_font))
commands.append(TextLeading(self._cur_font[1]*1.2))
self._reset_font = False
commands.append(Text(text))
return commands
def new_line(self):
if self._in_text:
return [ NewLine() ]
return []
def done(self):
if self._in_text:
self._in_text = False
return [ EndText() ]
return []
template = dict(
save = Save,
reset = reset,
image = image,
rect = rect,
rect_fill = rect_fill,
border = border,
font = font,
text = text,
text_box = text_box,
text_position = text_position,
new_line = new_line,
done = done,
fill_colour = fill_colour,
stroke_colour = stroke_colour,
fill_color = fill_colour,
stroke_color = stroke_colour,
)
class TextPosition(metaclass=FloatTupleCommand):
COMMAND = 'Td'
ARGS = ['x', 'y']
class TextRender(metaclass=FloatTupleCommand):
COMMAND = 'Tr'
ARGS = ['render']
class TextScale(metaclass=FloatTupleCommand):
COMMAND = 'Tz'
ARGS = ['scale']
class TextLeading(metaclass=FloatTupleCommand):
COMMAND = 'TL'
ARGS = ['leading']
class TextRise(metaclass=FloatTupleCommand):
COMMAND = 'Ts'
ARGS = ['rise']
class WordSpacing(metaclass=FloatTupleCommand):
COMMAND = 'Tw'
ARGS = ['wordSpace']
class CharacterSpacing(metaclass=FloatTupleCommand):
COMMAND = 'Tc'
ARGS = ['charSpace']
class StrokeGray(metaclass=FloatTupleCommand):
COMMAND = 'G'
ARGS = ['gray']
class FillGray(metaclass=FloatTupleCommand):
COMMAND = 'g'
ARGS = ['gray']
class NewLine(BaseCommand):
COMMAND = 'T*'
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotations/text.py 0000664 0000000 0000000 00000015312 15042366745 0024567 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Text Annotations
~~~~~~~~~~~~~~~~
The FreeText annotation
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
import os.path
import codecs
from ..pdfrw import PdfDict, PdfName, PdfString, PdfArray, IndirectPdfDict
from ..pdfttf import TTFFont
from .base import _make_border_dict
from .base import Annotation
from ..config.constants import DEFAULT_BASE_FONT
from ..config.constants import GRAPHICS_STATE_NAME
from ..config.constants import PDF_ANNOTATOR_FONT
from ..graphics import BeginText
from ..graphics import ContentStream
from ..graphics import EndText
from ..graphics import FillColor
from ..graphics import Font
from ..graphics import GraphicsState as CSGraphicsState
from ..graphics import Restore
from ..graphics import Save
from ..graphics import Text
from ..graphics import TextMatrix
from ..util.geometry import translate
from ..util.text import get_wrapped_lines
HELVETICA_PATH = os.path.join(os.path.dirname(__file__), "..", "fonts", "Helvetica.ttf")
class FreeText(Annotation):
"""FreeText annotation. Right now, we only support writing text in the
Helvetica font. Dealing with fonts is tricky business, so we'll leave that
for later.
"""
subtype = "FreeText"
def __init__(self, location, appearance, metadata=None):
super(FreeText, self).__init__(location, appearance, metadata)
self.ttffont = TTFFont(HELVETICA_PATH)
def make_rect(self):
L = self._location
return [L.x1, L.y1, L.x2, L.y2]
def make_default_appearance(self):
"""Returns a DA string for the text object, e.g. '1 0 0 rg /Helv 12 Tf'
"""
A = self._appearance
stream = ContentStream(
[FillColor(*A.fill[:3]), Font(PDF_ANNOTATOR_FONT, A.font_size)]
)
return stream.resolve()
def add_additional_pdf_object_data(self, obj):
obj[PdfName("Contents")] = self._appearance.content
obj[PdfName("DA")] = self.make_default_appearance()
obj[PdfName("C")] = []
# TODO allow setting border on free text boxes
obj[PdfName("BS")] = _make_border_dict(width=0, style="S")
# TODO DS is required to have BB not redraw the annotation in their own
# style when you edit it.
def add_additional_resources(self, resources):
font_dict = PdfDict()
font_dict[PDF_ANNOTATOR_FONT] = self.ttffont.get_font()
resources[PdfName("Font")] = font_dict
def make_appearance_stream(self):
A = self._appearance
L = self._location
stream = ContentStream(
[
Save(),
BeginText(),
FillColor(*A.fill[:3]),
Font(PDF_ANNOTATOR_FONT, A.font_size),
]
)
graphics_state = A.get_graphics_state()
if graphics_state.has_content():
stream.add(CSGraphicsState(GRAPHICS_STATE_NAME))
# Actually draw the text inside the rectangle
stream.extend(
get_text_commands(
L.x1,
L.y1,
L.x2,
L.y2,
text=A.content,
font_size=A.font_size,
wrap_text=A.wrap_text,
align=A.text_align,
baseline=A.text_baseline,
line_spacing=A.line_spacing,
font=self.ttffont,
)
)
stream.extend([EndText(), Restore()])
return stream
def get_text_commands(
x1, y1, x2, y2, text, font_size, wrap_text, align, baseline, line_spacing, font
):
"""Return the graphics stream commands necessary to render a free text
annotation, given the various parameters.
Text is optionally wrapped, then arranged according to align (horizontal
alignment), and baseline (vertical alignment).
:param number x1: bounding box lower left x
:param number y1: bounding box lower left y
:param number x2: bounding box upper right x
:param number y2: bounding box upper right y
:param str text: text to add to annotation
:param number font_size: font size
:param bool wrap_text: whether to wrap the text
:param str align: 'left'|'center'|'right'
:param str baseline: 'top'|'middle'|'bottom'
:param number line_spacing: multiplier to determine line spacing
:param TTFFont font: TTF font helper
"""
font.set_size(font_size)
font.set_text(text)
lines = (
get_wrapped_lines(text=text, measure=font.measure_text, max_length=x2 - x1)
if wrap_text
else [text]
)
# Line breaking cares about the whitespace in the string, but for the
# purposes of laying out the broken lines, we want to measure the lines
# without trailing/leading whitespace.
lines = [line.strip() for line in lines]
y_coords = _get_vertical_coordinates(
lines, y1, y2, font_size, line_spacing, baseline
)
xs = _get_horizontal_coordinates(lines, x1, x2, font.measure_text, align)
commands = []
for line, x, y in zip(lines, xs, y_coords):
line = line.encode("utf-16be").decode("latin1")
commands.extend([TextMatrix(translate(x, y)), Text(line)])
return commands
def _get_vertical_coordinates(lines, y1, y2, font_size, line_spacing, baseline):
"""Calculate vertical coordinates for all the lines at once, honoring the
text baseline property.
"""
line_spacing = font_size * line_spacing
if baseline == "top":
first_y = y2 - line_spacing
elif baseline == "middle":
midpoint = (y2 + y1) / 2.0
# For the first line of vertically centered text, go up half the # of
# lines, then go back down half the font size.
first_y = (
midpoint
- (line_spacing - font_size)
+ (((len(lines) - 1) / 2.0) * line_spacing)
)
else: # bottom
first_y = y1 + (line_spacing - font_size) + (line_spacing * (len(lines) - 1))
return [first_y - (i * line_spacing) for i in range(len(lines))]
def _get_horizontal_coordinates(lines, x1, x2, measure, align):
# NOTE: this padding is to keep text annotations as they are from cutting
# off text at the edges in certain conditions. The annotation rectangle
# and how PDFs draw text needs to be revisited, as this padding shouldn't
# be necessary.
PADDING = 1
if align == "left":
return [x1 + PADDING for _ in range(len(lines))]
elif align == "center":
widths = [measure(line) for line in lines]
max_width = x2 - x1
return [x1 + ((max_width - width) / 2.0) - PADDING for width in widths]
else: # right
widths = [measure(line) for line in lines]
max_width = x2 - x1
return [x1 + (max_width - width) - PADDING for width in widths]
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/annotator.py 0000664 0000000 0000000 00000027167 15042366745 0023266 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
PdfAnnotator
~~~~~~~~~~~~
The core annotator class.
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
import warnings
#from pdfrw import PdfReader
#from pdfrw import PdfWriter
from .annotations.image import Image
from .annotations.points import Ink
from .annotations.points import Line
from .annotations.points import Polygon
from .annotations.points import Polyline
from .annotations.rect import Circle
from .annotations.rect import Square
from .annotations.text import FreeText
from .config.metadata import Metadata
from .config.metadata import UNSET
from .graphics import ContentStream
from .util.geometry import identity
from .util.geometry import matrix_multiply
from .util.geometry import normalize_rotation
from .util.geometry import rotate
from .util.geometry import scale
from .util.geometry import translate
from .util.validation import NUMERIC_TYPES
NAME_TO_ANNOTATION = {
'square': Square,
'circle': Circle,
'line': Line,
'polygon': Polygon,
'polyline': Polyline,
'ink': Ink,
'text': FreeText,
'image': Image,
}
class PDF(object):
def __init__(self, pdf_reader):
self._reader = pdf_reader
self.pdf_version = self._reader.private.pdfdict.version
def get_page(self, page_number):
if page_number > len(self._reader.pages) - 1:
raise ValueError('Page number {} out of bounds ({} pages)'.format(
page_number,
len(self._reader.pages),
))
return self._reader.pages[page_number]
def get_rotation(self, page_number):
"""Returns the rotation of a specified page."""
page = self.get_page(page_number)
rotate = int(page.inheritable.Rotate or 0)
return normalize_rotation(rotate)
class PdfAnnotator(object):
def __init__(self, file_or_reader, scale=None, compress=True):
"""Draw annotations directly on PDFs. Annotations are always drawn on
as if you're drawing them in a viewer, i.e. they take into account page
rotation and weird, translated coordinate spaces.
:param str|PdfReader file_or_reader: filename of PDF or pdfrw.PdfReader
:param number|tuple|None scale: number by which to scale coordinates
to get to default user space. Use this if, for example, your points
in the coordinate space of the PDF viewed at a dpi. In this case,
scale would be 72/dpi. Can also specify a 2-tuple of x and y scale.
:param bool compress: whether to output flate-compressed PDFs
"""
if isinstance(file_or_reader, str):
file_or_reader = PdfReader(file_or_reader)
self._pdf = PDF(file_or_reader)
self._scale = self._expand_scale(scale)
self._dimensions = {}
self._compress = compress
def _expand_scale(self, scale):
if scale is None:
return 1, 1
elif isinstance(scale, NUMERIC_TYPES):
return (scale, scale)
return scale
def set_page_dimensions(self, dimensions, page_number):
"""Set dimensions for a given page number. If set, the dimensions for
this page override the document-wide rotation and scale settings.
:param tuple|None dimensions: As a convenient alternative to scale and
you can pass in the dimensions of your sheet when viewed in a
certain setting. For example, an 8.5"x11" PDF, rotated at 90° and
rastered at 150 dpi, would produce dimensions of (1650, 1275). If
you pass this in, you can then specify your coordinates in this
coordinate space.
:param int page_number:
"""
self._dimensions[page_number] = dimensions
def get_page_bounding_box(self, page_number):
page = self._pdf.get_page(page_number)
# PDF bounding boxes are complicated. We choose to use the CropBox, if
# it's available, because that's what Acrobat uses to display the
# actual PDF, and what pdfinfo uses to determine the PDF's dimensions.
# If CropBox isn't available, we use MediaBox. We ignore the TrimBox
# because Acrobat also ignores this when displaying the PDF.
crop_box = page.inheritable.CropBox
if crop_box is not None:
return [float(n) for n in crop_box]
return [float(n) for n in page.inheritable.MediaBox]
def get_size(self, page_number):
"""Returns the size of the specified page's bounding box (pts),
accounting for page rotation.
:param int page_number:
:returns tuple: If page is rotated 90° or 270°, the returned value will
be (height, width) in PDF user space. Otherwise the returned value
will be (width, height).
"""
x1, y1, x2, y2 = self.get_page_bounding_box(page_number)
rotation = self._pdf.get_rotation(page_number)
if rotation in (0, 180):
return (x2 - x1, y2 - y1)
return (y2 - y1, x2 - x1)
def add_annotation(
self,
annotation_type,
location,
appearance,
metadata=None,
):
"""Add an annotation of the given type, with the given parameters, to
the given location of the PDF.
:param str annotation_type: E.g. 'square'
:param Location location: Annotation's Location object, specified in
the coordinate system of the client. Coordinates will be
transformed to PDF user space via get_transform.
:param Appearance appearance:
:param Metadata|None|UNSET metadata: Metadata object. If UNSET, no
metadata is written on the entire annotation. If None, default
metadata is used.
"""
self._before_add(location)
metadata = self._resolve_metadata(metadata)
self._validate_appearance_stream(appearance)
annotation = self.get_annotation(
annotation_type,
location,
appearance,
metadata,
)
self._add_annotation(annotation)
@staticmethod
def _resolve_metadata(metadata):
if isinstance(metadata, Metadata):
return metadata
elif metadata is None:
return Metadata()
elif metadata is UNSET:
return None
else:
raise ValueError('Invalid metadata')
@staticmethod
def _validate_appearance_stream(appearance):
stream = appearance.appearance_stream
if stream is not None and not isinstance(stream, ContentStream):
raise ValueError(
'Invalid appearance stream format: {}'.format(type(stream)))
def _before_add(self, location):
# Steps to take before trying to add an annotation to `location`
page = self._pdf.get_page(location.page)
user_unit = page.inheritable.UserUnit
if user_unit not in (1, None):
warnings.warn(
'Unsupported UserUnit (value: {})'.format(user_unit)
)
def get_annotation(self, annotation_type, location, appearance, metadata):
# TODO filter on valid PDF versions, by type
annotation_cls = NAME_TO_ANNOTATION.get(annotation_type)
if annotation_cls is None:
raise ValueError('Invalid/unsupported annotation type: {}'.format(
annotation_type
))
annotation = annotation_cls(location, appearance, metadata)
annotation.validate(self._pdf.pdf_version)
return annotation
def get_scale(self, page_number):
"""Public API to get the x and y scales of the given page.
:param int page_number:
:returns 2-tuple: (x_scale, y_scale)
"""
rotation = self._pdf.get_rotation(page_number)
bounding_box = self.get_page_bounding_box(page_number)
return self._get_scale(page_number, bounding_box, rotation)
def get_rotation(self, page_number):
"""Public API to get the rotation of the give page.
:param int page_number:
:returns int: integer where i % 90 == 0
"""
return self._pdf.get_rotation(page_number)
def _get_scale(self, page_number, bounding_box, rotation):
W = bounding_box[2] - bounding_box[0]
H = bounding_box[3] - bounding_box[1]
dimensions = self._dimensions.get(page_number)
if dimensions is not None:
# User-specified dimensions for a particular page just give us the
# scaling factor to use for that page.
width_d, height_d = dimensions
width_pts, height_pts = W, H
if rotation in (90, 270):
width_pts, height_pts = H, W
x_scale = (width_pts / float(width_d))
y_scale = (height_pts / float(height_d))
else:
x_scale, y_scale = self._scale
return x_scale, y_scale
def get_transform(self, page_number, rotation):
bounding_box = self.get_page_bounding_box(page_number)
_scale = self._get_scale(page_number, bounding_box, rotation)
return self._get_transform(bounding_box, rotation, _scale)
@staticmethod
def _get_transform(bounding_box, rotation, _scale):
"""Get the transformation required to go from the user's desired
coordinate space to PDF user space, taking into account rotation,
scaling, translation (for things like weird media boxes).
"""
# Unrotated width and height, in pts
W = bounding_box[2] - bounding_box[0]
H = bounding_box[3] - bounding_box[1]
scale_matrix = scale(*_scale)
x_translate = 0 + bounding_box[0]
y_translate = 0 + bounding_box[1]
mb_translate = translate(x_translate, y_translate)
# Because of how rotation works the point isn't rotated around an axis,
# but the axis itself shifts. So we have to represent the rotation as
# rotation + translation.
rotation_matrix = rotate(rotation)
translate_matrix = identity()
if rotation == 90:
translate_matrix = translate(W, 0)
elif rotation == 180:
translate_matrix = translate(W, H)
elif rotation == 270:
translate_matrix = translate(0, H)
# Order matters here - the transformation matrices are applied in
# reverse order. So first we scale to get the points in PDF user space,
# since all other operations are in that space. Then we rotate and
# scale to capture page rotation, then finally we translate to account
# for offset media boxes.
transform = matrix_multiply(
mb_translate,
translate_matrix,
rotation_matrix,
scale_matrix,
)
return transform
def _add_annotation(self, annotation):
"""Add the annotation to the PDF document, transforming annotation
metadata and content stream to PDF user space.
"""
page = self._pdf.get_page(annotation.page)
transform = self.get_transform(
annotation.page,
self._pdf.get_rotation(annotation.page),
)
annotation_obj = annotation.as_pdf_object(transform, page)
if page.Annots:
page.Annots.append(annotation_obj)
else:
page.Annots = [annotation_obj]
def write(self, filename=None, overwrite=False):
if filename is None and not overwrite:
raise ValueError(
'Must specify either output filename or overwrite flag'
)
if overwrite:
filename = self._filename
writer = PdfWriter(
version=self._pdf.pdf_version,
compress=self._compress,
)
writer.write(fname=filename, trailer=self._pdf._reader)
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/config/ 0000775 0000000 0000000 00000000000 15042366745 0022137 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2_annotate/config/__init__.py 0000664 0000000 0000000 00000000000 15042366745 0024236 0 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2_annotate/config/appearance.py 0000664 0000000 0000000 00000012121 15042366745 0024605 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Appearance Config
~~~~~~~~~~~~~~~~~
Configuration for an annotation's appearance.
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
import attr
from .constants import ALLOWED_ALIGNS
from .constants import ALLOWED_BASELINES
from .constants import ALLOWED_LINE_CAPS
from .constants import ALLOWED_LINE_JOINS
from .constants import BLACK
from .constants import DEFAULT_BORDER_STYLE
from .constants import DEFAULT_CONTENT
from .constants import DEFAULT_FONT_SIZE
from .constants import DEFAULT_LINE_SPACING
from .constants import DEFAULT_STROKE_WIDTH
from .constants import GRAPHICS_STATE_NAME
from .constants import TEXT_ALIGN_LEFT
from .constants import TEXT_BASELINE_MIDDLE
from .graphics_state import GraphicsState
from ..graphics import ContentStream
from ..graphics import FillColor
from ..graphics import GraphicsState as CSGraphicsState
from ..graphics import Stroke
from ..graphics import StrokeAndFill
from ..graphics import StrokeColor
from ..graphics import StrokeWidth
from ..util.validation import between
from ..util.validation import Boolean
from ..util.validation import Color
from ..util.validation import Enum
from ..util.validation import Field
from ..util.validation import Number
from ..util.validation import positive
from ..util.validation import String
from ..util.validation import validate_dash_array
def is_transparent(color):
# E.g. a soothing gray: [0, 0, 0, 0.5]
if color is None:
return False
return len(color) == 4 and color[-1] < 1
@attr.s
class Appearance(object):
# Stroke attributes
stroke_color = Color(default=BLACK)
stroke_width = Number(default=DEFAULT_STROKE_WIDTH, validator=positive)
border_style = String(default=DEFAULT_BORDER_STYLE)
dash_array = Field(list, default=None, validator=validate_dash_array)
line_cap = Enum(ALLOWED_LINE_CAPS, default=None)
line_join = Enum(ALLOWED_LINE_JOINS, default=None)
miter_limit = Number(default=None, validator=positive)
stroke_transparency = Number(default=None, validator=between(0, 1))
# Fill attributes
fill = Color(default=None)
fill_transparency = Number(default=None, validator=between(0, 1))
# Text attributes
content = String(default=DEFAULT_CONTENT)
font_size = Number(default=DEFAULT_FONT_SIZE, validator=positive)
text_align = Enum(ALLOWED_ALIGNS, default=TEXT_ALIGN_LEFT)
text_baseline = Enum(ALLOWED_BASELINES, default=TEXT_BASELINE_MIDDLE)
line_spacing = Number(default=DEFAULT_LINE_SPACING, validator=positive)
wrap_text = Boolean(default=True)
# Image attributes
image = String(default=None)
# Advanced attributes
appearance_stream = Field(ContentStream, default=None)
xobjects = Field(dict, default=None)
graphics_states = Field(dict, default=None)
fonts = Field(dict, default=None)
def copy(self, **kwargs):
A = Appearance(**kwargs)
for k, v in self.__dict__.items():
if k not in kwargs:
setattr(A, k, v)
return A
def _get_stroke_transparency(self):
stroke_transparency = None
if is_transparent(self.stroke_color):
stroke_transparency = self.stroke_color[-1]
if self.stroke_transparency is not None:
stroke_transparency = self.stroke_transparency
return stroke_transparency
def _get_fill_transparency(self):
fill_transparency = None
if is_transparent(self.fill):
fill_transparency = self.fill[-1]
if self.fill_transparency is not None:
fill_transparency = self.fill_transparency
return fill_transparency
def get_graphics_state(self):
"""Return a GraphicsState config from the appearance's graphics-state-
applicable params.
:returns GraphicsState:
"""
return GraphicsState(
dash_array=self.dash_array,
line_cap=self.line_cap,
line_join=self.line_join,
miter_limit=self.miter_limit,
stroke_transparency=self._get_stroke_transparency(),
fill_transparency=self._get_fill_transparency(),
)
def set_appearance_state(stream, A):
"""Update the graphics command stream to reflect appearance properties.
:param ContentStream stream: current content stream
:param Appearance A: appearance object
"""
# Add in the `gs` command, which will execute the named graphics state from
# the Resources dict, and set CA and/or ca values. The annotations
# themselves will need to ensure that the proper ExtGState object is
# present in the Resources dict.
graphics_state = A.get_graphics_state()
if graphics_state.has_content():
stream.add(CSGraphicsState(GRAPHICS_STATE_NAME))
stream.extend([
StrokeColor(*A.stroke_color[:3]),
StrokeWidth(A.stroke_width),
])
# TODO support more color spaces - CMYK and GrayScale
if A.fill is not None:
stream.add(FillColor(*A.fill[:3]))
def stroke_or_fill(stream, A):
if A.fill is not None:
stream.add(StrokeAndFill())
else:
stream.add(Stroke())
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/config/constants.py 0000664 0000000 0000000 00000002315 15042366745 0024526 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Constants
~~~~~~~~~
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
TEXT_ALIGN_LEFT = 'left'
TEXT_ALIGN_CENTER = 'center'
TEXT_ALIGN_RIGHT = 'right'
ALLOWED_ALIGNS = (
TEXT_ALIGN_LEFT,
TEXT_ALIGN_RIGHT,
TEXT_ALIGN_CENTER,
)
TEXT_BASELINE_TOP = 'top'
TEXT_BASELINE_MIDDLE = 'middle'
TEXT_BASELINE_BOTTOM = 'bottom'
ALLOWED_BASELINES = (
TEXT_BASELINE_TOP,
TEXT_BASELINE_MIDDLE,
TEXT_BASELINE_BOTTOM,
)
DEFAULT_CONTENT = ''
DEFAULT_FONT_SIZE = 12
DEFAULT_LINE_SPACING = 1.2
DEFAULT_STROKE_WIDTH = 1
DEFAULT_BORDER_STYLE = 'S'
LINE_CAP_BUTT = 0
LINE_CAP_ROUND = 1
LINE_CAP_SQUARE = 2
ALLOWED_LINE_CAPS = (
LINE_CAP_BUTT,
LINE_CAP_ROUND,
LINE_CAP_SQUARE,
)
LINE_JOIN_MITER = 0
LINE_JOIN_ROUND = 1
LINE_JOIN_BEVEL = 2
ALLOWED_LINE_JOINS = (
LINE_JOIN_MITER,
LINE_JOIN_ROUND,
LINE_JOIN_BEVEL,
)
BLACK = (0, 0, 0)
TRANSPARENT = tuple()
GRAPHICS_STATE_NAME = 'PdfAnnotatorGS'
DEFAULT_BASE_FONT = 'Helvetica'
PDF_ANNOTATOR_FONT = 'PDFANNOTATORFONT1'
CMYK_MODE = 'CMYK'
RGB_MODE = 'RGB'
RGBA_MODE = 'RGBA'
PALETTE_MODE = 'P'
GRAYSCALE_MODE = 'L'
GRAYSCALE_ALPHA_MODE = 'LA'
SINGLE_CHANNEL_MODE = '1'
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/config/graphics_state.py 0000664 0000000 0000000 00000004371 15042366745 0025516 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
GraphicsState
~~~~~~~~~~~~~
Configuration for an annotation's graphics state.
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
import attr
from ..pdfrw import PdfDict, PdfName
from ..config.constants import ALLOWED_LINE_CAPS
from ..config.constants import ALLOWED_LINE_JOINS
from ..util.validation import between
from ..util.validation import Enum
from ..util.validation import Field
from ..util.validation import Number
from ..util.validation import positive
from ..util.validation import validate_dash_array
NAME_TO_PDF_ATTR = {
"line_width": "LW",
"line_cap": "LC",
"line_join": "LJ",
"miter_limit": "ML",
"dash_array": "D",
"stroke_transparency": "CA",
"fill_transparency": "ca",
}
@attr.s
class GraphicsState(object):
"""External graphics state config object, that can be used with explicit
content streams to control annotation appearance.
Some of these values can also be specified by their own operators in the
content stream. For example, the line_width property can also be specified
by the StrokeWidth (w) content stream operator.
See the full PDF spec for constraints on and descriptions of these values.
There are a lot more graphics state options, but they are highly technical
and beyond the scope of this library.
"""
line_width = Number(default=None, validator=positive)
line_cap = Enum(ALLOWED_LINE_CAPS, default=None)
line_join = Enum(ALLOWED_LINE_JOINS, default=None)
miter_limit = Number(default=None)
dash_array = Field(list, validator=validate_dash_array, default=None)
stroke_transparency = Number(default=None, validator=between(0, 1))
fill_transparency = Number(default=None, validator=between(0, 1))
def as_pdf_dict(self):
pdf_dict = PdfDict(Type=PdfName("ExtGState"))
for attr_name, attr_value in self.__dict__.items():
if attr_value is not None:
pdf_name = NAME_TO_PDF_ATTR[attr_name]
pdf_dict[PdfName(pdf_name)] = attr_value
return pdf_dict
def has_content(self):
"""Returns True if any of the attributes is non-null."""
return any(value is not None for value in self.__dict__.values())
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/config/location.py 0000664 0000000 0000000 00000001343 15042366745 0024322 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Location
~~~~~~~~~~~~
Configuration for an annotation's location.
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
import attr
from ..util.validation import Integer
from ..util.validation import Number
from ..util.validation import Points
from ..util.validation import positive
@attr.s
class Location(object):
page = Integer(validator=positive)
points = Points(default=None)
x1 = Number(default=None)
y1 = Number(default=None)
x2 = Number(default=None)
y2 = Number(default=None)
def copy(self):
L = Location(page=self.page)
for k, v in self.__dict__.items():
setattr(L, k, v)
return L
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/config/metadata.py 0000664 0000000 0000000 00000006400 15042366745 0024271 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
"""
Metadata
~~~~~~~~
Configuration for an annotation's metadata.
:copyright: Copyright 2019 Autodesk, Inc.
:license: MIT, see LICENSE for details.
"""
from datetime import datetime
from datetime import timedelta
from datetime import tzinfo
from uuid import uuid4
UNSET = object()
class UTC(tzinfo):
def utcoffset(self, dt):
return timedelta(0)
def dst(self, dt):
return timedelta(0)
def tzname(self, dt):
return 'UTC'
class Flags(object):
Invisible = 1
Hidden = 2
Print = 4
NoZoom = 8
NoRotate = 16
NoView = 32
ReadOnly = 64
Locked = 128
ToggleNoView = 256
LockedContents = 512
class Metadata(object):
"""PDF annotation metadata class.
By default, new annotations get the following properties:
* CreationDate - defaults to UTC now
* M (modified date) - defaults to UTC now
* NM (unique name) - defaults to uuid4()
* F (flags) - defaults to 4, just Flags.Print
Datetime objects should be timezone-aware. If they're not, UTC is used.
To leave any of these entries off the created annotation, pass kwarg=UNSET.
Any additional kwargs will be set on the annotation object as /Name value.
So for instance if you use Metadata(Subj='hi'), the annotation object in
the PDF will have an attribute of `/Subj (hi)`. Acceptable value types are
str, int, float, datetime, and lists of str/int/float. Other values may
work, but may appear oddly formatted in the PDF, or break it entirely.
"""
def __init__(
self,
creation_date=None,
modified_date=None,
name=None,
flags=None,
**kwargs
):
"""
:param datetime|None|UNSET creation_date:
:param datetime|None|UNSET modified_date:
:param str|None|UNSET name:
:param int|None|UNSET flags: if specified, a bunch of bitwise or-ed
`Flags` values. For instance, to specify both Hidden and Invisible,
use `Flags.Invisible | Flags.Hidden`. If flags are specified, the
default `Print` flag is no longer set; it must be set explicity.
"""
self.metadata = {}
self.set('CreationDate', creation_date, self.now)
self.set('M', modified_date, self.now)
self.set('NM', name, lambda: str(uuid4()))
self.set('F', flags, lambda: Flags.Print)
for k, v in kwargs.items():
if v is None:
raise ValueError("Can't write Nones to PDF")
self.set(k, v, None)
def set(self, attr, value, default_func):
if value is UNSET:
return
self.metadata[attr] = default_func() if value is None else value
def iter(self):
for name, value in self.metadata.items():
yield name, value
@staticmethod
def now():
return datetime.utcnow().replace(tzinfo=UTC())
def serialize_value(value):
if isinstance(value, datetime):
return serialize_datetime(value)
return value
def serialize_datetime(d):
if d.tzinfo is None:
d = d.replace(tzinfo=UTC())
offset_str = d.strftime('%z')
offset_str = "{}'{}".format(offset_str[:3], offset_str[3:])
return d.strftime('D:%Y%m%d%H%M%S{}'.format(offset_str))
endesive-2.19.1/endesive/pdf/PyPDF2_annotate/fonts/ 0000775 0000000 0000000 00000000000 15042366745 0022023 5 ustar 00root root 0000000 0000000 endesive-2.19.1/endesive/pdf/PyPDF2_annotate/fonts/Helvetica.ttf 0000664 0000000 0000000 00001143670 15042366745 0024462 0 ustar 00root root 0000000 0000000 FFTM = ǜ GDEF. GPOS;H7 <