first commit

This commit is contained in:
aschwarz
2022-11-28 10:27:30 +01:00
commit 9015dd2102
2720 changed files with 605111 additions and 0 deletions

View File

@ -0,0 +1,96 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\CrossReference;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
use setasign\Fpdi\PdfParser\Type\PdfToken;
use setasign\Fpdi\PdfParser\Type\PdfTypeException;
/**
* Abstract class for cross-reference reader classes.
*
* @package setasign\Fpdi\PdfParser\CrossReference
*/
abstract class AbstractReader
{
/**
* @var PdfParser
*/
protected $parser;
/**
* @var PdfDictionary
*/
protected $trailer;
/**
* AbstractReader constructor.
*
* @param PdfParser $parser
* @throws CrossReferenceException
* @throws PdfTypeException
*/
public function __construct(PdfParser $parser)
{
$this->parser = $parser;
$this->readTrailer();
}
/**
* Get the trailer dictionary.
*
* @return PdfDictionary
*/
public function getTrailer()
{
return $this->trailer;
}
/**
* Read the trailer dictionary.
*
* @throws CrossReferenceException
* @throws PdfTypeException
*/
protected function readTrailer()
{
try {
$trailerKeyword = $this->parser->readValue(null, PdfToken::class);
if ($trailerKeyword->value !== 'trailer') {
throw new CrossReferenceException(
\sprintf(
'Unexpected end of cross reference. "trailer"-keyword expected, got: %s.',
$trailerKeyword->value
),
CrossReferenceException::UNEXPECTED_END
);
}
} catch (PdfTypeException $e) {
throw new CrossReferenceException(
'Unexpected end of cross reference. "trailer"-keyword expected, got an invalid object type.',
CrossReferenceException::UNEXPECTED_END,
$e
);
}
try {
$trailer = $this->parser->readValue(null, PdfDictionary::class);
} catch (PdfTypeException $e) {
throw new CrossReferenceException(
'Unexpected end of cross reference. Trailer not found.',
CrossReferenceException::UNEXPECTED_END,
$e
);
}
$this->trailer = $trailer;
}
}

View File

@ -0,0 +1,320 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\CrossReference;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
use setasign\Fpdi\PdfParser\Type\PdfNumeric;
use setasign\Fpdi\PdfParser\Type\PdfStream;
use setasign\Fpdi\PdfParser\Type\PdfToken;
use setasign\Fpdi\PdfParser\Type\PdfTypeException;
/**
* Class CrossReference
*
* This class processes the standard cross reference of a PDF document.
*
* @package setasign\Fpdi\PdfParser\CrossReference
*/
class CrossReference
{
/**
* The byte length in which the "startxref" keyword should be searched.
*
* @var int
*/
static public $trailerSearchLength = 5500;
/**
* @var int
*/
protected $fileHeaderOffset = 0;
/**
* @var PdfParser
*/
protected $parser;
/**
* @var ReaderInterface[]
*/
protected $readers = [];
/**
* CrossReference constructor.
*
* @param PdfParser $parser
* @throws CrossReferenceException
* @throws PdfTypeException
*/
public function __construct(PdfParser $parser, $fileHeaderOffset = 0)
{
$this->parser = $parser;
$this->fileHeaderOffset = $fileHeaderOffset;
$offset = $this->findStartXref();
$reader = null;
/** @noinspection TypeUnsafeComparisonInspection */
while ($offset != false) { // By doing an unsafe comparsion we ignore faulty references to byte offset 0
try {
$reader = $this->readXref($offset + $this->fileHeaderOffset);
} catch (CrossReferenceException $e) {
// sometimes the file header offset is part of the byte offsets, so let's retry by resetting it to zero.
if ($e->getCode() === CrossReferenceException::INVALID_DATA && $this->fileHeaderOffset !== 0) {
$this->fileHeaderOffset = 0;
$reader = $this->readXref($offset + $this->fileHeaderOffset);
} else {
throw $e;
}
}
$trailer = $reader->getTrailer();
$this->checkForEncryption($trailer);
$this->readers[] = $reader;
if (isset($trailer->value['Prev'])) {
$offset = $trailer->value['Prev']->value;
} else {
$offset = false;
}
}
// fix faulty sub-section header
if ($reader instanceof FixedReader) {
/**
* @var FixedReader $reader
*/
$reader->fixFaultySubSectionShift();
}
if ($reader === null) {
throw new CrossReferenceException('No cross-reference found.', CrossReferenceException::NO_XREF_FOUND);
}
}
/**
* Get the size of the cross reference.
*
* @return integer
*/
public function getSize()
{
return $this->getTrailer()->value['Size']->value;
}
/**
* Get the trailer dictionary.
*
* @return PdfDictionary
*/
public function getTrailer()
{
return $this->readers[0]->getTrailer();
}
/**
* Get the cross reference readser instances.
*
* @return ReaderInterface[]
*/
public function getReaders()
{
return $this->readers;
}
/**
* Get the offset by an object number.
*
* @param int $objectNumber
* @return integer|bool
*/
public function getOffsetFor($objectNumber)
{
foreach ($this->getReaders() as $reader) {
$offset = $reader->getOffsetFor($objectNumber);
if ($offset !== false) {
return $offset;
}
}
return false;
}
/**
* Get an indirect object by its object number.
*
* @param int $objectNumber
* @return PdfIndirectObject
* @throws CrossReferenceException
*/
public function getIndirectObject($objectNumber)
{
$offset = $this->getOffsetFor($objectNumber);
if ($offset === false) {
throw new CrossReferenceException(
\sprintf('Object (id:%s) not found.', $objectNumber),
CrossReferenceException::OBJECT_NOT_FOUND
);
}
$parser = $this->parser;
$parser->getTokenizer()->clearStack();
$parser->getStreamReader()->reset($offset + $this->fileHeaderOffset);
try {
/** @var PdfIndirectObject $object */
$object = $parser->readValue(null, PdfIndirectObject::class);
} catch (PdfTypeException $e) {
throw new CrossReferenceException(
\sprintf('Object (id:%s) not found at location (%s).', $objectNumber, $offset),
CrossReferenceException::OBJECT_NOT_FOUND,
$e
);
}
if ($object->objectNumber !== $objectNumber) {
throw new CrossReferenceException(
\sprintf('Wrong object found, got %s while %s was expected.', $object->objectNumber, $objectNumber),
CrossReferenceException::OBJECT_NOT_FOUND
);
}
return $object;
}
/**
* Read the cross-reference table at a given offset.
*
* Internally the method will try to evaluate the best reader for this cross-reference.
*
* @param int $offset
* @return ReaderInterface
* @throws CrossReferenceException
* @throws PdfTypeException
*/
protected function readXref($offset)
{
$this->parser->getStreamReader()->reset($offset);
$this->parser->getTokenizer()->clearStack();
$initValue = $this->parser->readValue();
return $this->initReaderInstance($initValue);
}
/**
* Get a cross-reference reader instance.
*
* @param PdfToken|PdfIndirectObject $initValue
* @return ReaderInterface|bool
* @throws CrossReferenceException
* @throws PdfTypeException
*/
protected function initReaderInstance($initValue)
{
$position = $this->parser->getStreamReader()->getPosition()
+ $this->parser->getStreamReader()->getOffset() + $this->fileHeaderOffset;
if ($initValue instanceof PdfToken && $initValue->value === 'xref') {
try {
return new FixedReader($this->parser);
} catch (CrossReferenceException $e) {
$this->parser->getStreamReader()->reset($position);
$this->parser->getTokenizer()->clearStack();
return new LineReader($this->parser);
}
}
if ($initValue instanceof PdfIndirectObject) {
// check for encryption
$stream = PdfStream::ensure($initValue->value);
$type = PdfDictionary::get($stream->value, 'Type');
if ($type->value !== 'XRef') {
throw new CrossReferenceException(
'The xref position points to an incorrect object type.',
CrossReferenceException::INVALID_DATA
);
}
$this->checkForEncryption($stream->value);
throw new CrossReferenceException(
'This PDF document probably uses a compression technique which is not supported by the ' .
'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',
CrossReferenceException::COMPRESSED_XREF
);
}
throw new CrossReferenceException(
'The xref position points to an incorrect object type.',
CrossReferenceException::INVALID_DATA
);
}
/**
* Check for encryption.
*
* @param PdfDictionary $dictionary
* @throws CrossReferenceException
*/
protected function checkForEncryption(PdfDictionary $dictionary)
{
if (isset($dictionary->value['Encrypt'])) {
throw new CrossReferenceException(
'This PDF document is encrypted and cannot be processed with FPDI.',
CrossReferenceException::ENCRYPTED
);
}
}
/**
* Find the start position for the first cross-reference.
*
* @return int The byte-offset position of the first cross-reference.
* @throws CrossReferenceException
*/
protected function findStartXref()
{
$reader = $this->parser->getStreamReader();
$reader->reset(-self::$trailerSearchLength, self::$trailerSearchLength);
$buffer = $reader->getBuffer(false);
$pos = \strrpos($buffer, 'startxref');
$addOffset = 9;
if ($pos === false) {
// Some corrupted documents uses startref, instead of startxref
$pos = \strrpos($buffer, 'startref');
if ($pos === false) {
throw new CrossReferenceException(
'Unable to find pointer to xref table',
CrossReferenceException::NO_STARTXREF_FOUND
);
}
$addOffset = 8;
}
$reader->setOffset($pos + $addOffset);
try {
$value = $this->parser->readValue(null, PdfNumeric::class);
} catch (PdfTypeException $e) {
throw new CrossReferenceException(
'Invalid data after startxref keyword.',
CrossReferenceException::INVALID_DATA,
$e
);
}
return $value->value;
}
}

View File

@ -0,0 +1,80 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\CrossReference;
use setasign\Fpdi\PdfParser\PdfParserException;
/**
* Exception used by the CrossReference and Reader classes.
*
* @package setasign\Fpdi\PdfParser\CrossReference
*/
class CrossReferenceException extends PdfParserException
{
/**
* @var int
*/
const INVALID_DATA = 0x0101;
/**
* @var int
*/
const XREF_MISSING = 0x0102;
/**
* @var int
*/
const ENTRIES_TOO_LARGE = 0x0103;
/**
* @var int
*/
const ENTRIES_TOO_SHORT = 0x0104;
/**
* @var int
*/
const NO_ENTRIES = 0x0105;
/**
* @var int
*/
const NO_TRAILER_FOUND = 0x0106;
/**
* @var int
*/
const NO_STARTXREF_FOUND = 0x0107;
/**
* @var int
*/
const NO_XREF_FOUND = 0x0108;
/**
* @var int
*/
const UNEXPECTED_END = 0x0109;
/**
* @var int
*/
const OBJECT_NOT_FOUND = 0x010A;
/**
* @var int
*/
const COMPRESSED_XREF = 0x010B;
/**
* @var int
*/
const ENCRYPTED = 0x010C;
}

View File

@ -0,0 +1,196 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\CrossReference;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\StreamReader;
/**
* Class FixedReader
*
* This reader allows a very less overhead parsing of single entries of the cross-reference, because the main entries
* are only read when needed and not in a single run.
*
* @package setasign\Fpdi\PdfParser\CrossReference
*/
class FixedReader extends AbstractReader implements ReaderInterface
{
/**
* @var StreamReader
*/
protected $reader;
/**
* Data of subsections.
*
* @var array
*/
protected $subSections;
/**
* FixedReader constructor.
*
* @param PdfParser $parser
* @throws CrossReferenceException
*/
public function __construct(PdfParser $parser)
{
$this->reader = $parser->getStreamReader();
$this->read();
parent::__construct($parser);
}
/**
* Get all subsection data.
*
* @return array
*/
public function getSubSections()
{
return $this->subSections;
}
/**
* @inheritdoc
*/
public function getOffsetFor($objectNumber)
{
foreach ($this->subSections as $offset => list($startObject, $objectCount)) {
if ($objectNumber >= $startObject && $objectNumber < ($startObject + $objectCount)) {
$position = $offset + 20 * ($objectNumber - $startObject);
$this->reader->ensure($position, 20);
$line = $this->reader->readBytes(20);
if ($line[17] === 'f') {
return false;
}
return (int) \substr($line, 0, 10);
}
}
return false;
}
/**
* Read the cross-reference.
*
* This reader will only read the subsections in this method. The offsets were resolved individually by this
* information.
*
* @throws CrossReferenceException
*/
protected function read()
{
$subSections = [];
$startObject = $entryCount = $lastLineStart = null;
$validityChecked = false;
while (($line = $this->reader->readLine(20)) !== false) {
if (\strpos($line, 'trailer') !== false) {
$this->reader->reset($lastLineStart);
break;
}
// jump over if line content doesn't match the expected string
if (\sscanf($line, '%d %d', $startObject, $entryCount) !== 2) {
continue;
}
$oldPosition = $this->reader->getPosition();
$position = $oldPosition + $this->reader->getOffset();
if (!$validityChecked && $entryCount > 0) {
$nextLine = $this->reader->readBytes(21);
/* Check the next line for maximum of 20 bytes and not longer
* By catching 21 bytes and trimming the length should be still 21.
*/
if (\strlen(\trim($nextLine)) !== 21) {
throw new CrossReferenceException(
'Cross-reference entries are larger than 20 bytes.',
CrossReferenceException::ENTRIES_TOO_LARGE
);
}
/* Check for less than 20 bytes: cut the line to 20 bytes and trim; have to result in exactly 18 bytes.
* If it would have less bytes the substring would get the first bytes of the next line which would
* evaluate to a 20 bytes long string after trimming.
*/
if (\strlen(\trim(\substr($nextLine, 0, 20))) !== 18) {
throw new CrossReferenceException(
'Cross-reference entries are less than 20 bytes.',
CrossReferenceException::ENTRIES_TOO_SHORT
);
}
$validityChecked = true;
}
$subSections[$position] = [$startObject, $entryCount];
$lastLineStart = $position + $entryCount * 20;
$this->reader->reset($lastLineStart);
}
// reset after the last correct parsed line
$this->reader->reset($lastLineStart);
if (\count($subSections) === 0) {
throw new CrossReferenceException(
'No entries found in cross-reference.',
CrossReferenceException::NO_ENTRIES
);
}
$this->subSections = $subSections;
}
/**
* Fixes an invalid object number shift.
*
* This method can be used to repair documents with an invalid subsection header:
*
* <code>
* xref
* 1 7
* 0000000000 65535 f
* 0000000009 00000 n
* 0000412075 00000 n
* 0000412172 00000 n
* 0000412359 00000 n
* 0000412417 00000 n
* 0000412468 00000 n
* </code>
*
* It shall only be called on the first table.
*
* @return bool
*/
public function fixFaultySubSectionShift()
{
$subSections = $this->getSubSections();
if (\count($subSections) > 1) {
return false;
}
$subSection = \current($subSections);
if ($subSection[0] != 1) {
return false;
}
if ($this->getOffsetFor(1) === false) {
foreach ($subSections as $offset => list($startObject, $objectCount)) {
$this->subSections[$offset] = [$startObject - 1, $objectCount];
}
return true;
}
return false;
}
}

View File

@ -0,0 +1,173 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\CrossReference;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\StreamReader;
/**
* Class LineReader
*
* This reader class read all cross-reference entries in a single run.
* It supports reading cross-references with e.g. invalid data (e.g. entries with a length < or > 20 bytes).
*
* @package setasign\Fpdi\PdfParser\CrossReference
*/
class LineReader extends AbstractReader implements ReaderInterface
{
/**
* The object offsets.
*
* @var array
*/
protected $offsets;
/**
* LineReader constructor.
*
* @param PdfParser $parser
* @throws CrossReferenceException
*/
public function __construct(PdfParser $parser)
{
$this->read($this->extract($parser->getStreamReader()));
parent::__construct($parser);
}
/**
* @inheritdoc
*/
public function getOffsetFor($objectNumber)
{
if (isset($this->offsets[$objectNumber])) {
return $this->offsets[$objectNumber][0];
}
return false;
}
/**
* Get all found offsets.
*
* @return array
*/
public function getOffsets()
{
return $this->offsets;
}
/**
* Extracts the cross reference data from the stream reader.
*
* @param StreamReader $reader
* @return string
* @throws CrossReferenceException
*/
protected function extract(StreamReader $reader)
{
$cycles = -1;
$bytesPerCycle = 100;
$reader->reset(null, $bytesPerCycle);
while (
($trailerPos = \strpos($reader->getBuffer(false), 'trailer', \max($bytesPerCycle * $cycles++, 0))) === false
) {
if ($reader->increaseLength($bytesPerCycle) === false) {
break;
}
}
if ($trailerPos === false) {
throw new CrossReferenceException(
'Unexpected end of cross reference. "trailer"-keyword not found.',
CrossReferenceException::NO_TRAILER_FOUND
);
}
$xrefContent = \substr($reader->getBuffer(false), 0, $trailerPos);
$reader->reset($reader->getPosition() + $trailerPos);
return $xrefContent;
}
/**
* Read the cross-reference entries.
*
* @param string $xrefContent
* @throws CrossReferenceException
*/
protected function read($xrefContent)
{
// get eol markers in the first 100 bytes
\preg_match_all("/(\r\n|\n|\r)/", \substr($xrefContent, 0, 100), $m);
if (\count($m[0]) === 0) {
throw new CrossReferenceException(
'No data found in cross-reference.',
CrossReferenceException::INVALID_DATA
);
}
// count(array_count_values()) is faster then count(array_unique())
// @see https://github.com/symfony/symfony/pull/23731
// can be reverted for php7.2
$differentLineEndings = \count(\array_count_values($m[0]));
if ($differentLineEndings > 1) {
$lines = \preg_split("/(\r\n|\n|\r)/", $xrefContent, -1, PREG_SPLIT_NO_EMPTY);
} else {
$lines = \explode($m[0][0], $xrefContent);
}
unset($differentLineEndings, $m);
$linesCount = \count($lines);
$start = null;
$entryCount = 0;
$offsets = [];
/** @noinspection ForeachInvariantsInspection */
for ($i = 0; $i < $linesCount; $i++) {
$line = \trim($lines[$i]);
if ($line) {
$pieces = \explode(' ', $line);
$c = \count($pieces);
switch ($c) {
case 2:
$start = (int) $pieces[0];
$entryCount += (int) $pieces[1];
break;
/** @noinspection PhpMissingBreakStatementInspection */
case 3:
switch ($pieces[2]) {
case 'n':
$offsets[$start] = [(int) $pieces[0], (int) $pieces[1]];
$start++;
break 2;
case 'f':
$start++;
break 2;
}
// fall through if pieces doesn't match
default:
throw new CrossReferenceException(
\sprintf('Unexpected data in xref table (%s)', \implode(' ', $pieces)),
CrossReferenceException::INVALID_DATA
);
}
}
}
$this->offsets = $offsets;
}
}

View File

@ -0,0 +1,35 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\CrossReference;
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
/**
* ReaderInterface for cross-reference readers.
*
* @package setasign\Fpdi\PdfParser\CrossReference
*/
interface ReaderInterface
{
/**
* Get an offset by an object number.
*
* @param int $objectNumber
* @return int|bool False if the offset was not found.
*/
public function getOffsetFor($objectNumber);
/**
* Get the trailer related to this cross reference.
*
* @return PdfDictionary
*/
public function getTrailer();
}

View File

@ -0,0 +1,105 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Class for handling ASCII base-85 encoded data
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class Ascii85 implements FilterInterface
{
/**
* Decode ASCII85 encoded string.
*
* @param string $data The input string
* @return string
* @throws Ascii85Exception
*/
public function decode($data)
{
$out = '';
$state = 0;
$chn = null;
$data = \preg_replace('/\s/', '', $data);
$l = \strlen($data);
/** @noinspection ForeachInvariantsInspection */
for ($k = 0; $k < $l; ++$k) {
$ch = \ord($data[$k]) & 0xff;
//Start <~
if ($k === 0 && $ch === 60 && isset($data[$k + 1]) && (\ord($data[$k + 1]) & 0xFF) === 126) {
$k++;
continue;
}
//End ~>
if ($ch === 126 && isset($data[$k + 1]) && (\ord($data[$k + 1]) & 0xFF) === 62) {
break;
}
if ($ch === 122 /* z */ && $state === 0) {
$out .= \chr(0) . \chr(0) . \chr(0) . \chr(0);
continue;
}
if ($ch < 33 /* ! */ || $ch > 117 /* u */) {
throw new Ascii85Exception(
'Illegal character found while ASCII85 decode.',
Ascii85Exception::ILLEGAL_CHAR_FOUND
);
}
$chn[$state] = $ch - 33;/* ! */
$state++;
if ($state === 5) {
$state = 0;
$r = 0;
for ($j = 0; $j < 5; ++$j) {
/** @noinspection UnnecessaryCastingInspection */
$r = (int)($r * 85 + $chn[$j]);
}
$out .= \chr($r >> 24)
. \chr($r >> 16)
. \chr($r >> 8)
. \chr($r);
}
}
if ($state === 1) {
throw new Ascii85Exception(
'Illegal length while ASCII85 decode.',
Ascii85Exception::ILLEGAL_LENGTH
);
}
if ($state === 2) {
$r = $chn[0] * 85 * 85 * 85 * 85 + ($chn[1] + 1) * 85 * 85 * 85;
$out .= \chr($r >> 24);
} elseif ($state === 3) {
$r = $chn[0] * 85 * 85 * 85 * 85 + $chn[1] * 85 * 85 * 85 + ($chn[2] + 1) * 85 * 85;
$out .= \chr($r >> 24);
$out .= \chr($r >> 16);
} elseif ($state === 4) {
$r = $chn[0] * 85 * 85 * 85 * 85 + $chn[1] * 85 * 85 * 85 + $chn[2] * 85 * 85 + ($chn[3] + 1) * 85;
$out .= \chr($r >> 24);
$out .= \chr($r >> 16);
$out .= \chr($r >> 8);
}
return $out;
}
}

View File

@ -0,0 +1,28 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Exception for Ascii85 filter class
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class Ascii85Exception extends FilterException
{
/**
* @var integer
*/
const ILLEGAL_CHAR_FOUND = 0x0301;
/**
* @var integer
*/
const ILLEGAL_LENGTH = 0x0302;
}

View File

@ -0,0 +1,48 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Class for handling ASCII hexadecimal encoded data
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class AsciiHex implements FilterInterface
{
/**
* Converts an ASCII hexadecimal encoded string into its binary representation.
*
* @param string $data The input string
* @return string
*/
public function decode($data)
{
$data = \preg_replace('/[^0-9A-Fa-f]/', '', \rtrim($data, '>'));
if ((\strlen($data) % 2) === 1) {
$data .= '0';
}
return \pack('H*', $data);
}
/**
* Converts a string into ASCII hexadecimal representation.
*
* @param string $data The input string
* @param boolean $leaveEOD
* @return string
*/
public function encode($data, $leaveEOD = false)
{
$t = \unpack('H*', $data);
return \current($t)
. ($leaveEOD ? '' : '>');
}
}

View File

@ -0,0 +1,24 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
use setasign\Fpdi\PdfParser\PdfParserException;
/**
* Exception for filters
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class FilterException extends PdfParserException
{
const UNSUPPORTED_FILTER = 0x0201;
const NOT_IMPLEMENTED = 0x0202;
}

View File

@ -0,0 +1,26 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Interface for filters
*
* @package setasign\Fpdi\PdfParser\Filter
*/
interface FilterInterface
{
/**
* Decode a string.
*
* @param string $data The input string
* @return string
*/
public function decode($data);
}

View File

@ -0,0 +1,87 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Class for handling zlib/deflate encoded data
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class Flate implements FilterInterface
{
/**
* Checks whether the zlib extension is loaded.
*
* Used for testing purpose.
*
* @return boolean
* @internal
*/
protected function extensionLoaded()
{
return \extension_loaded('zlib');
}
/**
* Decodes a flate compressed string.
*
* @param string $data The input string
* @return string
* @throws FlateException
*/
public function decode($data)
{
if ($this->extensionLoaded()) {
$oData = $data;
$data = @((\strlen($data) > 0) ? \gzuncompress($data) : '');
if ($data === false) {
// let's try if the checksum is CRC32
$fh = fopen('php://temp', 'w+b');
fwrite($fh, "\x1f\x8b\x08\x00\x00\x00\x00\x00" . $oData);
stream_filter_append($fh, 'zlib.inflate', STREAM_FILTER_READ, ['window' => 30]);
fseek($fh, 0);
$data = stream_get_contents($fh);
fclose($fh);
if ($data) {
return $data;
}
// Try this fallback
$tries = 0;
$oDataLen = strlen($oData);
while ($tries < 6 && ($data === false || (strlen($data) < (strlen($oDataLen) - $tries - 1)))) {
$data = @(gzinflate(substr($oData, $tries)));
$tries++;
}
// let's use this fallback only if the $data is longer than the original data
if (strlen($data) > ($oDataLen - $tries - 1)) {
return $data;
}
if (!$data) {
throw new FlateException(
'Error while decompressing stream.',
FlateException::DECOMPRESS_ERROR
);
}
}
} else {
throw new FlateException(
'To handle FlateDecode filter, enable zlib support in PHP.',
FlateException::NO_ZLIB
);
}
return $data;
}
}

View File

@ -0,0 +1,28 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Exception for flate filter class
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class FlateException extends FilterException
{
/**
* @var integer
*/
const NO_ZLIB = 0x0401;
/**
* @var integer
*/
const DECOMPRESS_ERROR = 0x0402;
}

View File

@ -0,0 +1,189 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Class for handling LZW encoded data
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class Lzw implements FilterInterface
{
/**
* @var null|string
*/
protected $data;
/**
* @var array
*/
protected $sTable = [];
/**
* @var int
*/
protected $dataLength = 0;
/**
* @var int
*/
protected $tIdx;
/**
* @var int
*/
protected $bitsToGet = 9;
/**
* @var int
*/
protected $bytePointer;
/**
* @var int
*/
protected $nextData = 0;
/**
* @var int
*/
protected $nextBits = 0;
/**
* @var array
*/
protected $andTable = [511, 1023, 2047, 4095];
/**
* Method to decode LZW compressed data.
*
* @param string $data The compressed data
* @return string The uncompressed data
* @throws LzwException
*/
public function decode($data)
{
if ($data[0] === "\x00" && $data[1] === "\x01") {
throw new LzwException(
'LZW flavour not supported.',
LzwException::LZW_FLAVOUR_NOT_SUPPORTED
);
}
$this->initsTable();
$this->data = $data;
$this->dataLength = \strlen($data);
// Initialize pointers
$this->bytePointer = 0;
$this->nextData = 0;
$this->nextBits = 0;
$oldCode = 0;
$uncompData = '';
while (($code = $this->getNextCode()) !== 257) {
if ($code === 256) {
$this->initsTable();
$code = $this->getNextCode();
if ($code === 257) {
break;
}
$uncompData .= $this->sTable[$code];
$oldCode = $code;
} else {
if ($code < $this->tIdx) {
$string = $this->sTable[$code];
$uncompData .= $string;
$this->addStringToTable($this->sTable[$oldCode], $string[0]);
$oldCode = $code;
} else {
$string = $this->sTable[$oldCode];
$string .= $string[0];
$uncompData .= $string;
$this->addStringToTable($string);
$oldCode = $code;
}
}
}
return $uncompData;
}
/**
* Initialize the string table.
*/
protected function initsTable()
{
$this->sTable = [];
for ($i = 0; $i < 256; $i++) {
$this->sTable[$i] = \chr($i);
}
$this->tIdx = 258;
$this->bitsToGet = 9;
}
/**
* Add a new string to the string table.
*
* @param string $oldString
* @param string $newString
*/
protected function addStringToTable($oldString, $newString = '')
{
$string = $oldString . $newString;
// Add this new String to the table
$this->sTable[$this->tIdx++] = $string;
if ($this->tIdx === 511) {
$this->bitsToGet = 10;
} elseif ($this->tIdx === 1023) {
$this->bitsToGet = 11;
} elseif ($this->tIdx === 2047) {
$this->bitsToGet = 12;
}
}
/**
* Returns the next 9, 10, 11 or 12 bits.
*
* @return integer
*/
protected function getNextCode()
{
if ($this->bytePointer === $this->dataLength) {
return 257;
}
$this->nextData = ($this->nextData << 8) | (\ord($this->data[$this->bytePointer++]) & 0xff);
$this->nextBits += 8;
if ($this->nextBits < $this->bitsToGet) {
$this->nextData = ($this->nextData << 8) | (\ord($this->data[$this->bytePointer++]) & 0xff);
$this->nextBits += 8;
}
$code = ($this->nextData >> ($this->nextBits - $this->bitsToGet)) & $this->andTable[$this->bitsToGet - 9];
$this->nextBits -= $this->bitsToGet;
return $code;
}
}

View File

@ -0,0 +1,23 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Filter;
/**
* Exception for LZW filter class
*
* @package setasign\Fpdi\PdfParser\Filter
*/
class LzwException extends FilterException
{
/**
* @var integer
*/
const LZW_FLAVOUR_NOT_SUPPORTED = 0x0501;
}

View File

@ -0,0 +1,378 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser;
use setasign\Fpdi\PdfParser\CrossReference\CrossReference;
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
use setasign\Fpdi\PdfParser\Type\PdfArray;
use setasign\Fpdi\PdfParser\Type\PdfBoolean;
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
use setasign\Fpdi\PdfParser\Type\PdfHexString;
use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
use setasign\Fpdi\PdfParser\Type\PdfName;
use setasign\Fpdi\PdfParser\Type\PdfNull;
use setasign\Fpdi\PdfParser\Type\PdfNumeric;
use setasign\Fpdi\PdfParser\Type\PdfString;
use setasign\Fpdi\PdfParser\Type\PdfToken;
use setasign\Fpdi\PdfParser\Type\PdfType;
/**
* A PDF parser class
*
* @package setasign\Fpdi\PdfParser
*/
class PdfParser
{
/**
* @var StreamReader
*/
protected $streamReader;
/**
* @var Tokenizer
*/
protected $tokenizer;
/**
* The file header.
*
* @var string
*/
protected $fileHeader;
/**
* The offset to the file header.
*
* @var int
*/
protected $fileHeaderOffset;
/**
* @var CrossReference
*/
protected $xref;
/**
* All read objects.
*
* @var array
*/
protected $objects = [];
/**
* PdfParser constructor.
*
* @param StreamReader $streamReader
*/
public function __construct(StreamReader $streamReader)
{
$this->streamReader = $streamReader;
$this->tokenizer = new Tokenizer($streamReader);
}
/**
* Removes cycled references.
*
* @internal
*/
public function cleanUp()
{
$this->xref = null;
}
/**
* Get the stream reader instance.
*
* @return StreamReader
*/
public function getStreamReader()
{
return $this->streamReader;
}
/**
* Get the tokenizer instance.
*
* @return Tokenizer
*/
public function getTokenizer()
{
return $this->tokenizer;
}
/**
* Resolves the file header.
*
* @throws PdfParserException
* @return int
*/
protected function resolveFileHeader()
{
if ($this->fileHeader) {
return $this->fileHeaderOffset;
}
$this->streamReader->reset(0);
$offset = false;
$maxIterations = 1000;
while (true) {
$buffer = $this->streamReader->getBuffer(false);
$offset = \strpos($buffer, '%PDF-');
if ($offset === false) {
if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) {
throw new PdfParserException(
'Unable to find PDF file header.',
PdfParserException::FILE_HEADER_NOT_FOUND
);
}
continue;
}
break;
}
$this->fileHeaderOffset = $offset;
$this->streamReader->setOffset($offset);
$this->fileHeader = \trim($this->streamReader->readLine());
return $this->fileHeaderOffset;
}
/**
* Get the cross reference instance.
*
* @return CrossReference
* @throws CrossReferenceException
* @throws PdfParserException
*/
public function getCrossReference()
{
if ($this->xref === null) {
$this->xref = new CrossReference($this, $this->resolveFileHeader());
}
return $this->xref;
}
/**
* Get the PDF version.
*
* @return int[] An array of major and minor version.
* @throws PdfParserException
*/
public function getPdfVersion()
{
$this->resolveFileHeader();
if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) {
throw new PdfParserException(
'Unable to extract PDF version from file header.',
PdfParserException::PDF_VERSION_NOT_FOUND
);
}
list(, $major, $minor) = $result;
$catalog = $this->getCatalog();
if (isset($catalog->value['Version'])) {
$versionParts = \explode('.', PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value));
if (count($versionParts) === 2) {
list($major, $minor) = $versionParts;
}
}
return [(int) $major, (int) $minor];
}
/**
* Get the catalog dictionary.
*
* @return PdfDictionary
* @throws Type\PdfTypeException
* @throws CrossReferenceException
* @throws PdfParserException
*/
public function getCatalog()
{
$xref = $this->getCrossReference();
$trailer = $xref->getTrailer();
$catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this);
return PdfDictionary::ensure($catalog);
}
/**
* Get an indirect object by its object number.
*
* @param int $objectNumber
* @param bool $cache
* @return PdfIndirectObject
* @throws CrossReferenceException
* @throws PdfParserException
*/
public function getIndirectObject($objectNumber, $cache = false)
{
$objectNumber = (int) $objectNumber;
if (isset($this->objects[$objectNumber])) {
return $this->objects[$objectNumber];
}
$xref = $this->getCrossReference();
$object = $xref->getIndirectObject($objectNumber);
if ($cache) {
$this->objects[$objectNumber] = $object;
}
return $object;
}
/**
* Read a PDF value.
*
* @param null|bool|string $token
* @param null|string $expectedType
* @return bool|PdfArray|PdfBoolean|PdfHexString|PdfName|PdfNull|PdfNumeric|PdfString|PdfToken|PdfIndirectObjectReference
* @throws Type\PdfTypeException
*/
public function readValue($token = null, $expectedType = null)
{
if ($token === null) {
$token = $this->tokenizer->getNextToken();
}
if ($token === false) {
if ($expectedType !== null) {
throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
}
return false;
}
switch ($token) {
case '(':
$this->ensureExpectedType($token, $expectedType);
return PdfString::parse($this->streamReader);
case '<':
if ($this->streamReader->getByte() === '<') {
$this->ensureExpectedType('<<', $expectedType);
$this->streamReader->addOffset(1);
return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
}
$this->ensureExpectedType($token, $expectedType);
return PdfHexString::parse($this->streamReader);
case '/':
$this->ensureExpectedType($token, $expectedType);
return PdfName::parse($this->tokenizer, $this->streamReader);
case '[':
$this->ensureExpectedType($token, $expectedType);
return PdfArray::parse($this->tokenizer, $this);
default:
if (\is_numeric($token)) {
if (($token2 = $this->tokenizer->getNextToken()) !== false) {
if (\is_numeric($token2)) {
if (($token3 = $this->tokenizer->getNextToken()) !== false) {
switch ($token3) {
case 'obj':
if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) {
throw new Type\PdfTypeException(
'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
);
}
return PdfIndirectObject::parse(
$token,
$token2,
$this,
$this->tokenizer,
$this->streamReader
);
case 'R':
if ($expectedType !== null &&
$expectedType !== PdfIndirectObjectReference::class
) {
throw new Type\PdfTypeException(
'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
);
}
return PdfIndirectObjectReference::create($token, $token2);
}
$this->tokenizer->pushStack($token3);
}
}
$this->tokenizer->pushStack($token2);
}
if ($expectedType !== null && $expectedType !== PdfNumeric::class) {
throw new Type\PdfTypeException(
'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
);
}
return PdfNumeric::create($token);
}
if ($token === 'true' || $token === 'false') {
$this->ensureExpectedType($token, $expectedType);
return PdfBoolean::create($token === 'true');
}
if ($token === 'null') {
$this->ensureExpectedType($token, $expectedType);
return new PdfNull();
}
if ($expectedType !== null && $expectedType !== PdfToken::class) {
throw new Type\PdfTypeException(
'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
);
}
$v = new PdfToken();
$v->value = $token;
return $v;
}
}
/**
* Ensures that the token will evaluate to an expected object type (or not).
*
* @param string $token
* @param string|null $expectedType
* @return bool
* @throws Type\PdfTypeException
*/
private function ensureExpectedType($token, $expectedType)
{
static $mapping = [
'(' => PdfString::class,
'<' => PdfHexString::class,
'<<' => PdfDictionary::class,
'/' => PdfName::class,
'[' => PdfArray::class,
'true' => PdfBoolean::class,
'false' => PdfBoolean::class,
'null' => PdfNull::class
];
if ($expectedType === null || $mapping[$token] === $expectedType) {
return true;
}
throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
}
}

View File

@ -0,0 +1,50 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser;
use setasign\Fpdi\FpdiException;
/**
* Exception for the pdf parser class
*
* @package setasign\Fpdi\PdfParser
*/
class PdfParserException extends FpdiException
{
/**
* @var int
*/
const NOT_IMPLEMENTED = 0x0001;
/**
* @var int
*/
const IMPLEMENTED_IN_FPDI_PDF_PARSER = 0x0002;
/**
* @var int
*/
const INVALID_DATA_TYPE = 0x0003;
/**
* @var int
*/
const FILE_HEADER_NOT_FOUND = 0x0004;
/**
* @var int
*/
const PDF_VERSION_NOT_FOUND = 0x0005;
/**
* @var int
*/
const INVALID_DATA_SIZE = 0x0006;
}

View File

@ -0,0 +1,468 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser;
/**
* A stream reader class
*
* @package setasign\Fpdi\PdfParser
*/
class StreamReader
{
/**
* Creates a stream reader instance by a string value.
*
* @param string $content
* @param int $maxMemory
* @return StreamReader
*/
public static function createByString($content, $maxMemory = 2097152)
{
$h = \fopen('php://temp/maxmemory:' . ((int) $maxMemory), 'r+b');
\fwrite($h, $content);
\rewind($h);
return new self($h, true);
}
/**
* Creates a stream reader instance by a filename.
*
* @param string $filename
* @return StreamReader
*/
public static function createByFile($filename)
{
$h = \fopen($filename, 'rb');
return new self($h, true);
}
/**
* Defines whether the stream should be closed when the stream reader instance is deconstructed or not.
*
* @var bool
*/
protected $closeStream;
/**
* The stream resource.
*
* @var resource
*/
protected $stream;
/**
* The byte-offset position in the stream.
*
* @var int
*/
protected $position;
/**
* The byte-offset position in the buffer.
*
* @var int
*/
protected $offset;
/**
* The buffer length.
*
* @var int
*/
protected $bufferLength;
/**
* The total length of the stream.
*
* @var int
*/
protected $totalLength;
/**
* The buffer.
*
* @var string
*/
protected $buffer;
/**
* StreamReader constructor.
*
* @param resource $stream
* @param bool $closeStream Defines whether to close the stream resource if the instance is destructed or not.
*/
public function __construct($stream, $closeStream = false)
{
if (!\is_resource($stream)) {
throw new \InvalidArgumentException(
'No stream given.'
);
}
$metaData = \stream_get_meta_data($stream);
if (!$metaData['seekable']) {
throw new \InvalidArgumentException(
'Given stream is not seekable!'
);
}
$this->stream = $stream;
$this->closeStream = $closeStream;
$this->reset();
}
/**
* The destructor.
*/
public function __destruct()
{
$this->cleanUp();
}
/**
* Closes the file handle.
*/
public function cleanUp()
{
if ($this->closeStream && is_resource($this->stream)) {
\fclose($this->stream);
}
}
/**
* Returns the byte length of the buffer.
*
* @param bool $atOffset
* @return int
*/
public function getBufferLength($atOffset = false)
{
if ($atOffset === false) {
return $this->bufferLength;
}
return $this->bufferLength - $this->offset;
}
/**
* Get the current position in the stream.
*
* @return int
*/
public function getPosition()
{
return $this->position;
}
/**
* Returns the current buffer.
*
* @param bool $atOffset
* @return string
*/
public function getBuffer($atOffset = true)
{
if ($atOffset === false) {
return $this->buffer;
}
$string = \substr($this->buffer, $this->offset);
return (string) $string;
}
/**
* Gets a byte at a specific position in the buffer.
*
* If the position is invalid the method will return false.
*
* If the $position parameter is set to null the value of $this->offset will be used.
*
* @param int|null $position
* @return string|bool
*/
public function getByte($position = null)
{
$position = (int) ($position !== null ? $position : $this->offset);
if ($position >= $this->bufferLength &&
(!$this->increaseLength() || $position >= $this->bufferLength)
) {
return false;
}
return $this->buffer[$position];
}
/**
* Returns a byte at a specific position, and set the offset to the next byte position.
*
* If the position is invalid the method will return false.
*
* If the $position parameter is set to null the value of $this->offset will be used.
*
* @param int|null $position
* @return string|bool
*/
public function readByte($position = null)
{
if ($position !== null) {
$position = (int) $position;
// check if needed bytes are available in the current buffer
if (!($position >= $this->position && $position < $this->position + $this->bufferLength)) {
$this->reset($position);
$offset = $this->offset;
} else {
$offset = $position - $this->position;
}
} else {
$offset = $this->offset;
}
if ($offset >= $this->bufferLength &&
((!$this->increaseLength()) || $offset >= $this->bufferLength)
) {
return false;
}
$this->offset = $offset + 1;
return $this->buffer[$offset];
}
/**
* Read bytes from the current or a specific offset position and set the internal pointer to the next byte.
*
* If the position is invalid the method will return false.
*
* If the $position parameter is set to null the value of $this->offset will be used.
*
* @param int $length
* @param int|null $position
* @return string
*/
public function readBytes($length, $position = null)
{
$length = (int) $length;
if ($position !== null) {
// check if needed bytes are available in the current buffer
if (!($position >= $this->position && $position < $this->position + $this->bufferLength)) {
$this->reset($position, $length);
$offset = $this->offset;
} else {
$offset = $position - $this->position;
}
} else {
$offset = $this->offset;
}
if (($offset + $length) > $this->bufferLength &&
((!$this->increaseLength($length)) || ($offset + $length) > $this->bufferLength)
) {
return false;
}
$bytes = \substr($this->buffer, $offset, $length);
$this->offset = $offset + $length;
return $bytes;
}
/**
* Read a line from the current position.
*
* @param int $length
* @return string|bool
*/
public function readLine($length = 1024)
{
if ($this->ensureContent() === false) {
return false;
}
$line = '';
while ($this->ensureContent()) {
$char = $this->readByte();
if ($char === "\n") {
break;
}
if ($char === "\r") {
if ($this->getByte() === "\n") {
$this->addOffset(1);
}
break;
}
$line .= $char;
if (\strlen($line) >= $length) {
break;
}
}
return $line;
}
/**
* Set the offset position in the current buffer.
*
* @param int $offset
*/
public function setOffset($offset)
{
if ($offset > $this->bufferLength || $offset < 0) {
throw new \OutOfRangeException(
\sprintf('Offset (%s) out of range (length: %s)', $offset, $this->bufferLength)
);
}
$this->offset = (int) $offset;
}
/**
* Returns the current offset in the current buffer.
*
* @return int
*/
public function getOffset()
{
return $this->offset;
}
/**
* Add an offset to the current offset.
*
* @param int $offset
*/
public function addOffset($offset)
{
$this->setOffset($this->offset + $offset);
}
/**
* Make sure that there is at least one character beyond the current offset in the buffer.
*
* @return bool
*/
public function ensureContent()
{
while ($this->offset >= $this->bufferLength) {
if (!$this->increaseLength()) {
return false;
}
}
return true;
}
/**
* Returns the stream.
*
* @return resource
*/
public function getStream()
{
return $this->stream;
}
/**
* Gets the total available length.
*
* @return int
*/
public function getTotalLength()
{
if ($this->totalLength === null) {
$stat = \fstat($this->stream);
$this->totalLength = $stat['size'];
}
return $this->totalLength;
}
/**
* Resets the buffer to a position and re-read the buffer with the given length.
*
* If the $pos parameter is negative the start buffer position will be the $pos'th position from
* the end of the file.
*
* If the $pos parameter is negative and the absolute value is bigger then the totalLength of
* the file $pos will set to zero.
*
* @param int|null $pos Start position of the new buffer
* @param int $length Length of the new buffer. Mustn't be negative
*/
public function reset($pos = 0, $length = 200)
{
if ($pos === null) {
$pos = $this->position + $this->offset;
} elseif ($pos < 0) {
$pos = \max(0, $this->getTotalLength() + $pos);
}
\fseek($this->stream, $pos);
$this->position = $pos;
$this->buffer = $length > 0 ? \fread($this->stream, $length) : '';
$this->bufferLength = \strlen($this->buffer);
$this->offset = 0;
// If a stream wrapper is in use it is possible that
// length values > 8096 will be ignored, so use the
// increaseLength()-method to correct that behavior
if ($this->bufferLength < $length && $this->increaseLength($length - $this->bufferLength)) {
// increaseLength parameter is $minLength, so cut to have only the required bytes in the buffer
$this->buffer = \substr($this->buffer, 0, $length);
$this->bufferLength = \strlen($this->buffer);
}
}
/**
* Ensures bytes in the buffer with a specific length and location in the file.
*
* @param int $pos
* @param int $length
* @see reset()
*/
public function ensure($pos, $length)
{
if ($pos >= $this->position
&& $pos < ($this->position + $this->bufferLength)
&& ($this->position + $this->bufferLength) >= ($pos + $length)
) {
$this->offset = $pos - $this->position;
} else {
$this->reset($pos, $length);
}
}
/**
* Forcefully read more data into the buffer.
*
* @param int $minLength
* @return bool Returns false if the stream reaches the end
*/
public function increaseLength($minLength = 100)
{
$length = \max($minLength, 100);
if (\feof($this->stream) || $this->getTotalLength() === $this->position + $this->bufferLength) {
return false;
}
$newLength = $this->bufferLength + $length;
do {
$this->buffer .= \fread($this->stream, $newLength - $this->bufferLength);
$this->bufferLength = \strlen($this->buffer);
} while (($this->bufferLength !== $newLength) && !\feof($this->stream));
return true;
}
}

View File

@ -0,0 +1,161 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser;
/**
* A tokenizer class.
*
* @package setasign\Fpdi\PdfParser
*/
class Tokenizer
{
/**
* @var StreamReader
*/
protected $streamReader;
/**
* A token stack.
*
* @var string[]
*/
protected $stack = [];
/**
* Tokenizer constructor.
*
* @param StreamReader $streamReader
*/
public function __construct(StreamReader $streamReader)
{
$this->streamReader = $streamReader;
}
/**
* Get the stream reader instance.
*
* @return StreamReader
*/
public function getStreamReader()
{
return $this->streamReader;
}
/**
* Clear the token stack.
*/
public function clearStack()
{
$this->stack = [];
}
/**
* Push a token onto the stack.
*
* @param string $token
*/
public function pushStack($token)
{
$this->stack[] = $token;
}
/**
* Get next token.
*
* @return bool|string
*/
public function getNextToken()
{
$token = \array_pop($this->stack);
if ($token !== null) {
return $token;
}
if (($byte = $this->streamReader->readByte()) === false) {
return false;
}
if ($byte === "\x20" ||
$byte === "\x0A" ||
$byte === "\x0D" ||
$byte === "\x0C" ||
$byte === "\x09" ||
$byte === "\x00"
) {
if ($this->leapWhiteSpaces() === false) {
return false;
}
$byte = $this->streamReader->readByte();
}
switch ($byte) {
case '/':
case '[':
case ']':
case '(':
case ')':
case '{':
case '}':
case '<':
case '>':
return $byte;
case '%':
$this->streamReader->readLine();
return $this->getNextToken();
}
/* This way is faster than checking single bytes.
*/
$bufferOffset = $this->streamReader->getOffset();
do {
$lastBuffer = $this->streamReader->getBuffer(false);
$pos = \strcspn(
$lastBuffer,
"\x00\x09\x0A\x0C\x0D\x20()<>[]{}/%",
$bufferOffset
);
} while (
// Break the loop if a delimiter or white space char is matched
// in the current buffer or increase the buffers length
$lastBuffer !== false &&
(
$bufferOffset + $pos === \strlen($lastBuffer) &&
$this->streamReader->increaseLength()
)
);
$result = \substr($lastBuffer, $bufferOffset - 1, $pos + 1);
$this->streamReader->setOffset($bufferOffset + $pos);
return $result;
}
/**
* Leap white spaces.
*
* @return boolean
*/
public function leapWhiteSpaces()
{
do {
if (!$this->streamReader->ensureContent()) {
return false;
}
$buffer = $this->streamReader->getBuffer(false);
$matches = \strspn($buffer, "\x20\x0A\x0C\x0D\x09\x00", $this->streamReader->getOffset());
if ($matches > 0) {
$this->streamReader->addOffset($matches);
}
} while ($this->streamReader->getOffset() >= $this->streamReader->getBufferLength());
return true;
}
}

View File

@ -0,0 +1,85 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\Tokenizer;
/**
* Class representing a PDF array object
*
* @package setasign\Fpdi\PdfParser\Type
* @property array $value The value of the PDF type.
*/
class PdfArray extends PdfType
{
/**
* Parses an array of the passed tokenizer and parser.
*
* @param Tokenizer $tokenizer
* @param PdfParser $parser
* @return bool|self
* @throws PdfTypeException
*/
public static function parse(Tokenizer $tokenizer, PdfParser $parser)
{
$result = [];
// Recurse into this function until we reach the end of the array.
while (($token = $tokenizer->getNextToken()) !== ']') {
if ($token === false || ($value = $parser->readValue($token)) === false) {
return false;
}
$result[] = $value;
}
$v = new self;
$v->value = $result;
return $v;
}
/**
* Helper method to create an instance.
*
* @param PdfType[] $values
* @return self
*/
public static function create(array $values = [])
{
$v = new self;
$v->value = $values;
return $v;
}
/**
* Ensures that the passed array is a PdfArray instance with a (optional) specific size.
*
* @param mixed $array
* @param null|int $size
* @return self
* @throws PdfTypeException
*/
public static function ensure($array, $size = null)
{
$result = PdfType::ensureType(self::class, $array, 'Array value expected.');
if ($size !== null && \count($array->value) !== $size) {
throw new PdfTypeException(
\sprintf('Array with %s entries expected.', $size),
PdfTypeException::INVALID_DATA_SIZE
);
}
return $result;
}
}

View File

@ -0,0 +1,43 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
/**
* Class representing a boolean PDF object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfBoolean extends PdfType
{
/**
* Helper method to create an instance.
*
* @param bool $value
* @return self
*/
public static function create($value)
{
$v = new self;
$v->value = (boolean) $value;
return $v;
}
/**
* Ensures that the passed value is a PdfBoolean instance.
*
* @param mixed $value
* @return self
* @throws PdfTypeException
*/
public static function ensure($value)
{
return PdfType::ensureType(self::class, $value, 'Boolean value expected.');
}
}

View File

@ -0,0 +1,135 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\StreamReader;
use setasign\Fpdi\PdfParser\Tokenizer;
/**
* Class representing a PDF dictionary object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfDictionary extends PdfType
{
/**
* Parses a dictionary of the passed tokenizer, stream-reader and parser.
*
* @param Tokenizer $tokenizer
* @param StreamReader $streamReader
* @param PdfParser $parser
* @return bool|self
* @throws PdfTypeException
*/
public static function parse(Tokenizer $tokenizer, StreamReader $streamReader, PdfParser $parser)
{
$entries = [];
while (true) {
$token = $tokenizer->getNextToken();
if ($token === '>' && $streamReader->getByte() === '>') {
$streamReader->addOffset(1);
break;
}
$key = $parser->readValue($token);
if ($key === false) {
return false;
}
// ensure the first value to be a Name object
if (!($key instanceof PdfName)) {
$lastToken = null;
// ignore all other entries and search for the closing brackets
while (($token = $tokenizer->getNextToken()) !== '>' && $token !== false && $lastToken !== '>') {
$lastToken = $token;
}
if ($token === false) {
return false;
}
break;
}
$value = $parser->readValue();
if ($value === false) {
return false;
}
if ($value instanceof PdfNull) {
continue;
}
// catch missing value
if ($value instanceof PdfToken && $value->value === '>' && $streamReader->getByte() === '>') {
$streamReader->addOffset(1);
break;
}
$entries[$key->value] = $value;
}
$v = new self;
$v->value = $entries;
return $v;
}
/**
* Helper method to create an instance.
*
* @param PdfType[] $entries The keys are the name entries of the dictionary.
* @return self
*/
public static function create(array $entries = [])
{
$v = new self;
$v->value = $entries;
return $v;
}
/**
* Get a value by its key from a dictionary or a default value.
*
* @param mixed $dictionary
* @param string $key
* @param PdfType|mixed|null $default
* @return PdfNull|PdfType
* @throws PdfTypeException
*/
public static function get($dictionary, $key, PdfType $default = null)
{
$dictionary = self::ensure($dictionary);
if (isset($dictionary->value[$key])) {
return $dictionary->value[$key];
}
return $default === null
? new PdfNull()
: $default;
}
/**
* Ensures that the passed value is a PdfDictionary instance.
*
* @param mixed $dictionary
* @return self
* @throws PdfTypeException
*/
public static function ensure($dictionary)
{
return PdfType::ensureType(self::class, $dictionary, 'Dictionary value expected.');
}
}

View File

@ -0,0 +1,82 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\StreamReader;
/**
* Class representing a hexadecimal encoded PDF string object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfHexString extends PdfType
{
/**
* Parses a hexadecimal string object from the stream reader.
*
* @param StreamReader $streamReader
* @return bool|self
*/
public static function parse(StreamReader $streamReader)
{
$bufferOffset = $streamReader->getOffset();
/**
* @var string $buffer
* @var int $pos
*/
while (true) {
$buffer = $streamReader->getBuffer(false);
$pos = \strpos($buffer, '>', $bufferOffset);
if ($pos === false) {
if (!$streamReader->increaseLength()) {
return false;
}
continue;
}
break;
}
$result = \substr($buffer, $bufferOffset, $pos - $bufferOffset);
$streamReader->setOffset($pos + 1);
$v = new self;
$v->value = $result;
return $v;
}
/**
* Helper method to create an instance.
*
* @param string $string The hex encoded string.
* @return self
*/
public static function create($string)
{
$v = new self;
$v->value = $string;
return $v;
}
/**
* Ensures that the passed value is a PdfHexString instance.
*
* @param mixed $hexString
* @return self
* @throws PdfTypeException
*/
public static function ensure($hexString)
{
return PdfType::ensureType(self::class, $hexString, 'Hex string value expected.');
}
}

View File

@ -0,0 +1,104 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\StreamReader;
use setasign\Fpdi\PdfParser\Tokenizer;
/**
* Class representing an indirect object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfIndirectObject extends PdfType
{
/**
* Parses an indirect object from a tokenizer, parser and stream-reader.
*
* @param int $objectNumberToken
* @param int $objectGenerationNumberToken
* @param PdfParser $parser
* @param Tokenizer $tokenizer
* @param StreamReader $reader
* @return bool|self
* @throws PdfTypeException
*/
public static function parse(
$objectNumberToken,
$objectGenerationNumberToken,
PdfParser $parser,
Tokenizer $tokenizer,
StreamReader $reader
) {
$value = $parser->readValue();
if ($value === false) {
return false;
}
$nextToken = $tokenizer->getNextToken();
if ($nextToken === 'stream') {
$value = PdfStream::parse($value, $reader, $parser);
} elseif ($nextToken !== false) {
$tokenizer->pushStack($nextToken);
}
$v = new self;
$v->objectNumber = (int) $objectNumberToken;
$v->generationNumber = (int) $objectGenerationNumberToken;
$v->value = $value;
return $v;
}
/**
* Helper method to create an instance.
*
* @param int $objectNumber
* @param int $generationNumber
* @param PdfType $value
* @return self
*/
public static function create($objectNumber, $generationNumber, PdfType $value)
{
$v = new self;
$v->objectNumber = (int) $objectNumber;
$v->generationNumber = (int) $generationNumber;
$v->value = $value;
return $v;
}
/**
* Ensures that the passed value is a PdfIndirectObject instance.
*
* @param mixed $indirectObject
* @return self
* @throws PdfTypeException
*/
public static function ensure($indirectObject)
{
return PdfType::ensureType(self::class, $indirectObject, 'Indirect object expected.');
}
/**
* The object number.
*
* @var int
*/
public $objectNumber;
/**
* The generation number.
*
* @var int
*/
public $generationNumber;
}

View File

@ -0,0 +1,53 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
/**
* Class representing an indirect object reference
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfIndirectObjectReference extends PdfType
{
/**
* Helper method to create an instance.
*
* @param int $objectNumber
* @param int $generationNumber
* @return self
*/
public static function create($objectNumber, $generationNumber)
{
$v = new self;
$v->value = (int) $objectNumber;
$v->generationNumber = (int) $generationNumber;
return $v;
}
/**
* Ensures that the passed value is a PdfIndirectObject instance.
*
* @param mixed $value
* @return self
* @throws PdfTypeException
*/
public static function ensure($value)
{
return PdfType::ensureType(self::class, $value, 'Indirect reference value expected.');
}
/**
* The generation number.
*
* @var int
*/
public $generationNumber;
}

View File

@ -0,0 +1,82 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\StreamReader;
use setasign\Fpdi\PdfParser\Tokenizer;
/**
* Class representing a PDF name object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfName extends PdfType
{
/**
* Parses a name object from the passed tokenizer and stream-reader.
*
* @param Tokenizer $tokenizer
* @param StreamReader $streamReader
* @return self
*/
public static function parse(Tokenizer $tokenizer, StreamReader $streamReader)
{
$v = new self;
if (\strspn($streamReader->getByte(), "\x00\x09\x0A\x0C\x0D\x20()<>[]{}/%") === 0) {
$v->value = (string) $tokenizer->getNextToken();
return $v;
}
$v->value = '';
return $v;
}
/**
* Unescapes a name string.
*
* @param string $value
* @return string
*/
static public function unescape($value)
{
if (strpos($value, '#') === false)
return $value;
return preg_replace_callback('/#([a-fA-F\d]{2})/', function($matches) {
return chr(hexdec($matches[1]));
}, $value);
}
/**
* Helper method to create an instance.
*
* @param string $string
* @return self
*/
public static function create($string)
{
$v = new self;
$v->value = $string;
return $v;
}
/**
* Ensures that the passed value is a PdfName instance.
*
* @param mixed $name
* @return self
* @throws PdfTypeException
*/
public static function ensure($name)
{
return PdfType::ensureType(self::class, $name, 'Name value expected.');
}
}

View File

@ -0,0 +1,20 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
/**
* Class representing a PDF null object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfNull extends PdfType
{
// empty body
}

View File

@ -0,0 +1,44 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
/**
* Class representing a numeric PDF object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfNumeric extends PdfType
{
/**
* Helper method to create an instance.
*
* @param int|float $value
* @return PdfNumeric
*/
public static function create($value)
{
$v = new self;
$v->value = $value + 0;
return $v;
}
/**
* Ensures that the passed value is a PdfNumeric instance.
*
* @param mixed $value
* @return self
* @throws PdfTypeException
*/
public static function ensure($value)
{
return PdfType::ensureType(self::class, $value, 'Numeric value expected.');
}
}

View File

@ -0,0 +1,320 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
use setasign\Fpdi\PdfParser\Filter\Ascii85;
use setasign\Fpdi\PdfParser\Filter\AsciiHex;
use setasign\Fpdi\PdfParser\Filter\FilterException;
use setasign\Fpdi\PdfParser\Filter\Flate;
use setasign\Fpdi\PdfParser\Filter\Lzw;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\PdfParserException;
use setasign\Fpdi\PdfParser\StreamReader;
use setasign\FpdiPdfParser\PdfParser\Filter\Predictor;
/**
* Class representing a PDF stream object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfStream extends PdfType
{
/**
* Parses a stream from a stream reader.
*
* @param PdfDictionary $dictionary
* @param StreamReader $reader
* @param PdfParser $parser Optional to keep backwards compatibility
* @return self
* @throws PdfTypeException
*/
public static function parse(PdfDictionary $dictionary, StreamReader $reader, PdfParser $parser = null)
{
$v = new self;
$v->value = $dictionary;
$v->reader = $reader;
$v->parser = $parser;
$offset = $reader->getOffset();
// Find the first "newline"
while (($firstByte = $reader->getByte($offset)) !== false) {
if ($firstByte !== "\n" && $firstByte !== "\r") {
$offset++;
} else {
break;
}
}
if (false === $firstByte) {
throw new PdfTypeException(
'Unable to parse stream data. No newline after the stream keyword found.',
PdfTypeException::NO_NEWLINE_AFTER_STREAM_KEYWORD
);
}
$sndByte = $reader->getByte($offset + 1);
if ($firstByte === "\n" || $firstByte === "\r") {
$offset++;
}
if ($sndByte === "\n" && $firstByte !== "\n") {
$offset++;
}
$reader->setOffset($offset);
// let's only save the byte-offset and read the stream only when needed
$v->stream = $reader->getPosition() + $reader->getOffset();
return $v;
}
/**
* Helper method to create an instance.
*
* @param PdfDictionary $dictionary
* @param string $stream
* @return self
*/
public static function create(PdfDictionary $dictionary, $stream)
{
$v = new self;
$v->value = $dictionary;
$v->stream = (string) $stream;
return $v;
}
/**
* Ensures that the passed value is a PdfStream instance.
*
* @param mixed $stream
* @return self
* @throws PdfTypeException
*/
public static function ensure($stream)
{
return PdfType::ensureType(self::class, $stream, 'Stream value expected.');
}
/**
* The stream or its byte-offset position.
*
* @var int|string
*/
protected $stream;
/**
* The stream reader instance.
*
* @var StreamReader
*/
protected $reader;
/**
* The PDF parser instance.
*
* @var PdfParser
*/
protected $parser;
/**
* Get the stream data.
*
* @param bool $cache Whether cache the stream data or not.
* @return bool|string
* @throws PdfTypeException
* @throws CrossReferenceException
* @throws PdfParserException
*/
public function getStream($cache = false)
{
if (\is_int($this->stream)) {
$length = PdfDictionary::get($this->value, 'Length');
if ($this->parser !== null) {
$length = PdfType::resolve($length, $this->parser);
}
if (!($length instanceof PdfNumeric) || $length->value === 0) {
$this->reader->reset($this->stream, 100000);
$buffer = $this->extractStream();
} else {
$this->reader->reset($this->stream, $length->value);
$buffer = $this->reader->getBuffer(false);
if ($this->parser !== null) {
$this->reader->reset($this->stream + strlen($buffer));
$this->parser->getTokenizer()->clearStack();
$token = $this->parser->readValue();
if ($token === false || !($token instanceof PdfToken) || $token->value !== 'endstream') {
$this->reader->reset($this->stream, 100000);
$buffer = $this->extractStream();
$this->reader->reset($this->stream + strlen($buffer));
}
}
}
if ($cache === false) {
return $buffer;
}
$this->stream = $buffer;
$this->reader = null;
}
return $this->stream;
}
/**
* Extract the stream "manually".
*
* @return string
* @throws PdfTypeException
*/
protected function extractStream()
{
while (true) {
$buffer = $this->reader->getBuffer(false);
$length = \strpos($buffer, 'endstream');
if ($length === false) {
if (!$this->reader->increaseLength(100000)) {
throw new PdfTypeException('Cannot extract stream.');
}
continue;
}
break;
}
$buffer = \substr($buffer, 0, $length);
$lastByte = \substr($buffer, -1);
/* Check for EOL marker =
* CARRIAGE RETURN (\r) and a LINE FEED (\n) or just a LINE FEED (\n},
* and not by a CARRIAGE RETURN (\r) alone
*/
if ($lastByte === "\n") {
$buffer = \substr($buffer, 0, -1);
$lastByte = \substr($buffer, -1);
if ($lastByte === "\r") {
$buffer = \substr($buffer, 0, -1);
}
}
return $buffer;
}
/**
* Get the unfiltered stream data.
*
* @return string
* @throws FilterException
* @throws PdfParserException
*/
public function getUnfilteredStream()
{
$stream = $this->getStream();
$filters = PdfDictionary::get($this->value, 'Filter');
if ($filters instanceof PdfNull) {
return $stream;
}
if ($filters instanceof PdfArray) {
$filters = $filters->value;
} else {
$filters = [$filters];
}
$decodeParams = PdfDictionary::get($this->value, 'DecodeParms');
if ($decodeParams instanceof PdfArray) {
$decodeParams = $decodeParams->value;
} else {
$decodeParams = [$decodeParams];
}
foreach ($filters as $key => $filter) {
if (!($filter instanceof PdfName)) {
continue;
}
$decodeParam = null;
if (isset($decodeParams[$key])) {
$decodeParam = ($decodeParams[$key] instanceof PdfDictionary ? $decodeParams[$key] : null);
}
switch ($filter->value) {
case 'FlateDecode':
case 'Fl':
case 'LZWDecode':
case 'LZW':
if (\strpos($filter->value, 'LZW') === 0) {
$filterObject = new Lzw();
} else {
$filterObject = new Flate();
}
$stream = $filterObject->decode($stream);
if ($decodeParam instanceof PdfDictionary) {
$predictor = PdfDictionary::get($decodeParam, 'Predictor', PdfNumeric::create(1));
if ($predictor->value !== 1) {
if (!\class_exists(Predictor::class)) {
throw new PdfParserException(
'This PDF document makes use of features which are only implemented in the ' .
'commercial "FPDI PDF-Parser" add-on (see https://www.setasign.com/fpdi-pdf-' .
'parser).',
PdfParserException::IMPLEMENTED_IN_FPDI_PDF_PARSER
);
}
$colors = PdfDictionary::get($decodeParam, 'Colors', PdfNumeric::create(1));
$bitsPerComponent = PdfDictionary::get(
$decodeParam,
'BitsPerComponent',
PdfNumeric::create(8)
);
$columns = PdfDictionary::get($decodeParam, 'Columns', PdfNumeric::create(1));
$filterObject = new Predictor(
$predictor->value,
$colors->value,
$bitsPerComponent->value,
$columns->value
);
$stream = $filterObject->decode($stream);
}
}
break;
case 'ASCII85Decode':
case 'A85':
$filterObject = new Ascii85();
$stream = $filterObject->decode($stream);
break;
case 'ASCIIHexDecode':
case 'AHx':
$filterObject = new AsciiHex();
$stream = $filterObject->decode($stream);
break;
default:
throw new FilterException(
\sprintf('Unsupported filter "%s".', $filter->value),
FilterException::UNSUPPORTED_FILTER
);
}
}
return $stream;
}
}

View File

@ -0,0 +1,172 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\StreamReader;
/**
* Class representing a PDF string object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfString extends PdfType
{
/**
* Parses a string object from the stream reader.
*
* @param StreamReader $streamReader
* @return self
*/
public static function parse(StreamReader $streamReader)
{
$pos = $startPos = $streamReader->getOffset();
$openBrackets = 1;
do {
$buffer = $streamReader->getBuffer(false);
for ($length = \strlen($buffer); $openBrackets !== 0 && $pos < $length; $pos++) {
switch ($buffer[$pos]) {
case '(':
$openBrackets++;
break;
case ')':
$openBrackets--;
break;
case '\\':
$pos++;
}
}
} while ($openBrackets !== 0 && $streamReader->increaseLength());
$result = \substr($buffer, $startPos, $openBrackets + $pos - $startPos - 1);
$streamReader->setOffset($pos);
$v = new self;
$v->value = $result;
return $v;
}
/**
* Helper method to create an instance.
*
* @param string $value The string needs to be escaped accordingly.
* @return self
*/
public static function create($value)
{
$v = new self;
$v->value = $value;
return $v;
}
/**
* Ensures that the passed value is a PdfString instance.
*
* @param mixed $string
* @return self
* @throws PdfTypeException
*/
public static function ensure($string)
{
return PdfType::ensureType(self::class, $string, 'String value expected.');
}
/**
* Unescapes escaped sequences in a PDF string according to the PDF specification.
*
* @param string $s
* @return string
*/
public static function unescape($s)
{
$out = '';
/** @noinspection ForeachInvariantsInspection */
for ($count = 0, $n = \strlen($s); $count < $n; $count++) {
if ($s[$count] !== '\\') {
$out .= $s[$count];
} else {
// A backslash at the end of the string - ignore it
if ($count === ($n - 1)) {
break;
}
switch ($s[++$count]) {
case ')':
case '(':
case '\\':
$out .= $s[$count];
break;
case 'f':
$out .= "\x0C";
break;
case 'b':
$out .= "\x08";
break;
case 't':
$out .= "\x09";
break;
case 'r':
$out .= "\x0D";
break;
case 'n':
$out .= "\x0A";
break;
case "\r":
if ($count !== $n - 1 && $s[$count + 1] === "\n") {
$count++;
}
break;
case "\n":
break;
default:
$actualChar = \ord($s[$count]);
// ascii 48 = number 0
// ascii 57 = number 9
if ($actualChar >= 48 &&
$actualChar <= 57) {
$oct = '' . $s[$count];
/** @noinspection NotOptimalIfConditionsInspection */
if ($count + 1 < $n &&
\ord($s[$count + 1]) >= 48 &&
\ord($s[$count + 1]) <= 57
) {
$count++;
$oct .= $s[$count];
/** @noinspection NotOptimalIfConditionsInspection */
if ($count + 1 < $n &&
\ord($s[$count + 1]) >= 48 &&
\ord($s[$count + 1]) <= 57
) {
$oct .= $s[++$count];
}
}
$out .= \chr(\octdec($oct));
} else {
// If the character is not one of those defined, the backslash is ignored
$out .= $s[$count];
}
}
}
}
return $out;
}
}

View File

@ -0,0 +1,44 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
/**
* Class representing PDF token object
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfToken extends PdfType
{
/**
* Helper method to create an instance.
*
* @param string $token
* @return self
*/
public static function create($token)
{
$v = new self;
$v->value = $token;
return $v;
}
/**
* Ensures that the passed value is a PdfToken instance.
*
* @param mixed $token
* @return self
* @throws PdfTypeException
*/
public static function ensure($token)
{
return PdfType::ensureType(self::class, $token, 'Token value expected.');
}
}

View File

@ -0,0 +1,79 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\PdfParserException;
/**
* A class defining a PDF data type
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfType
{
/**
* Resolves a PdfType value to its value.
*
* This method is used to evaluate indirect and direct object references until a final value is reached.
*
* @param PdfType $value
* @param PdfParser $parser
* @param bool $stopAtIndirectObject
* @return PdfType
* @throws CrossReferenceException
* @throws PdfParserException
*/
public static function resolve(PdfType $value, PdfParser $parser, $stopAtIndirectObject = false)
{
if ($value instanceof PdfIndirectObject) {
if ($stopAtIndirectObject === true) {
return $value;
}
return self::resolve($value->value, $parser, $stopAtIndirectObject);
}
if ($value instanceof PdfIndirectObjectReference) {
return self::resolve($parser->getIndirectObject($value->value), $parser, $stopAtIndirectObject);
}
return $value;
}
/**
* Ensure that a value is an instance of a specific PDF type.
*
* @param string $type
* @param PdfType $value
* @param string $errorMessage
* @return mixed
* @throws PdfTypeException
*/
protected static function ensureType($type, $value, $errorMessage)
{
if (!($value instanceof $type)) {
throw new PdfTypeException(
$errorMessage,
PdfTypeException::INVALID_DATA_TYPE
);
}
return $value;
}
/**
* The value of the PDF type.
*
* @var mixed
*/
public $value;
}

View File

@ -0,0 +1,25 @@
<?php
/**
* This file is part of FPDI
*
* @package setasign\Fpdi
* @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
* @license http://opensource.org/licenses/mit-license The MIT License
*/
namespace setasign\Fpdi\PdfParser\Type;
use setasign\Fpdi\PdfParser\PdfParserException;
/**
* Exception class for pdf type classes
*
* @package setasign\Fpdi\PdfParser\Type
*/
class PdfTypeException extends PdfParserException
{
/**
* @var int
*/
const NO_NEWLINE_AFTER_STREAM_KEYWORD = 0x0601;
}