482 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			482 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
| <?php
 | |
| //
 | |
| //  FPDI - Version 1.1
 | |
| //
 | |
| //    Copyright 2004,2005 Setasign - Jan Slabon
 | |
| //
 | |
| //  Licensed under the Apache License, Version 2.0 (the "License");
 | |
| //  you may not use this file except in compliance with the License.
 | |
| //  You may obtain a copy of the License at
 | |
| //
 | |
| //      http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| //  Unless required by applicable law or agreed to in writing, software
 | |
| //  distributed under the License is distributed on an "AS IS" BASIS,
 | |
| //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| //  See the License for the specific language governing permissions and
 | |
| //  limitations under the License.
 | |
| //
 | |
| 
 | |
| if (!defined ('PDF_TYPE_NULL'))
 | |
|     define ('PDF_TYPE_NULL', 0);
 | |
| if (!defined ('PDF_TYPE_NUMERIC'))
 | |
|     define ('PDF_TYPE_NUMERIC', 1);
 | |
| if (!defined ('PDF_TYPE_TOKEN'))
 | |
|     define ('PDF_TYPE_TOKEN', 2);
 | |
| if (!defined ('PDF_TYPE_HEX'))
 | |
|     define ('PDF_TYPE_HEX', 3);
 | |
| if (!defined ('PDF_TYPE_STRING'))
 | |
|     define ('PDF_TYPE_STRING', 4);
 | |
| if (!defined ('PDF_TYPE_DICTIONARY'))
 | |
|     define ('PDF_TYPE_DICTIONARY', 5);
 | |
| if (!defined ('PDF_TYPE_ARRAY'))
 | |
|     define ('PDF_TYPE_ARRAY', 6);
 | |
| if (!defined ('PDF_TYPE_OBJDEC'))
 | |
|     define ('PDF_TYPE_OBJDEC', 7);
 | |
| if (!defined ('PDF_TYPE_OBJREF'))
 | |
|     define ('PDF_TYPE_OBJREF', 8);
 | |
| if (!defined ('PDF_TYPE_OBJECT'))
 | |
|     define ('PDF_TYPE_OBJECT', 9);
 | |
| if (!defined ('PDF_TYPE_STREAM'))
 | |
|     define ('PDF_TYPE_STREAM', 10);
 | |
| 
 | |
| 
 | |
| require_once("wrapper_functions.php");
 | |
| require_once("pdf_parser.php");
 | |
| 
 | |
| class fpdi_pdf_parser extends pdf_parser {
 | |
| 
 | |
|     /**
 | |
|      * Pages
 | |
|      * Index beginns at 0
 | |
|      *
 | |
|      * @var array
 | |
|      */
 | |
|     var $pages;
 | |
|     
 | |
|     /**
 | |
|      * Page count
 | |
|      * @var integer
 | |
|      */
 | |
|     var $page_count;
 | |
|     
 | |
|     /**
 | |
|      * actual page number
 | |
|      * @var integer
 | |
|      */
 | |
|     var $pageno;
 | |
|     
 | |
|     /**
 | |
|      * PDF Version of imported Document
 | |
|      * @var string
 | |
|      */
 | |
|     var $pdfVersion;
 | |
|     
 | |
|     /**
 | |
|      * FPDI Reference
 | |
|      * @var object
 | |
|      */
 | |
|     var $fpdi;
 | |
|     
 | |
|     /**
 | |
|      * Constructor
 | |
|      *
 | |
|      * @param string $filename  Source-Filename
 | |
|      * @param object $fpdi      Object of type fpdi
 | |
|      */
 | |
|     function fpdi_pdf_parser($filename,&$fpdi) {
 | |
|         $this->fpdi =& $fpdi;
 | |
| 		$this->filename = $filename;
 | |
| 		
 | |
|         parent::pdf_parser($filename);
 | |
| 
 | |
|         // Get Info
 | |
|         $this->getInfo();
 | |
| 
 | |
|         // resolve Pages-Dictonary
 | |
|         $pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);
 | |
| 
 | |
|         // Read pages
 | |
|         $this->read_pages($this->c, $pages, $this->pages);
 | |
|         
 | |
|         // count pages;
 | |
|         $this->page_count = count($this->pages);
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Overwrite parent::error()
 | |
|      *
 | |
|      * @param string $msg  Error-Message
 | |
|      */
 | |
|     function error($msg) {
 | |
|     	$this->fpdi->error($msg);	
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Get pagecount from sourcefile
 | |
|      *
 | |
|      * @return int
 | |
|      */
 | |
|     function getPageCount() {
 | |
|         return $this->page_count;
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /**
 | |
|      * Set pageno
 | |
|      *
 | |
|      * @param int $pageno Pagenumber to use
 | |
|      */
 | |
|     function setPageno($pageno) {
 | |
|         $pageno-=1;
 | |
| 
 | |
|         if ($pageno < 0 || $pageno >= $this->getPageCount()) {
 | |
|             $this->fpdi->error("Pagenumber is wrong!");
 | |
|         }
 | |
| 
 | |
|         $this->pageno = $pageno;
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Get page-resources from current page
 | |
|      *
 | |
|      * @return array
 | |
|      */
 | |
|     function getPageResources() {
 | |
|         return $this->_getPageResources($this->pages[$this->pageno]);
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Get page-resources from /Page
 | |
|      *
 | |
|      * @param array $obj Array of pdf-data
 | |
|      */
 | |
|     function _getPageResources ($obj) { // $obj = /Page
 | |
|     	$obj = $this->pdf_resolve_object($this->c, $obj);
 | |
| 
 | |
|         // If the current object has a resources
 | |
|     	// dictionary associated with it, we use
 | |
|     	// it. Otherwise, we move back to its
 | |
|     	// parent object.
 | |
|         if (isset ($obj[1][1]['/Resources'])) {
 | |
|     		$res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
 | |
|     		if ($res[0] == PDF_TYPE_OBJECT)
 | |
|                 return $res[1];
 | |
|             return $res;
 | |
|     	} else {
 | |
|     		if (!isset ($obj[1][1]['/Parent'])) {
 | |
|     			return false;
 | |
|     		} else {
 | |
|                 $res = $this->_getPageResources($obj[1][1]['/Parent']);
 | |
|                 if ($res[0] == PDF_TYPE_OBJECT)
 | |
|                     return $res[1];
 | |
|                 return $res;
 | |
|     		}
 | |
|     	}
 | |
|     }
 | |
| 
 | |
| 
 | |
|     function getInfo() {
 | |
|         $avail_infos = array("Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped");
 | |
| 
 | |
|         $_infos = $this->pdf_resolve_object($this->c,$this->xref['trailer'][1]['/Info']);
 | |
|         $infos = array();
 | |
| 
 | |
|         foreach ($avail_infos AS $info) {
 | |
|             if (isset($_infos[1][1]["/".$info])) {
 | |
|                 if ($_infos[1][1]["/".$info][0] == PDF_TYPE_STRING) {
 | |
|                     $infos[$info] = $this->deescapeString($_infos[1][1]["/".$info][1]);
 | |
|                 } else if ($_infos[1][1]["/".$info][0] == PDF_TYPE_HEX) {
 | |
|                     $infos[$info] = $this->hex2String($_infos[1][1]["/".$info][1]);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         $this->infos = $infos;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Rebuilds a hexstring to string
 | |
|      *
 | |
|      * @param string $hex hexstring
 | |
|      * @return string
 | |
|      */
 | |
|     function hex2String($hex) {
 | |
|     	$endian = false;
 | |
| 
 | |
|         if (preg_match("/^FEFF/",$hex)) { // is utf-16 aka big endian
 | |
|             $i = 4;
 | |
|             $endian = "big";
 | |
|         } else if (preg_match("/^FFFE/",$hex)) { // is utf-16 aka little endian
 | |
|             $i = 4;
 | |
|             $endian = "little";
 | |
|         } else {
 | |
|             $i = 0;
 | |
|         }
 | |
| 
 | |
|         $s = "";
 | |
|         $l = strlen($hex);
 | |
|         for (; $i < $l; $i+=2) {
 | |
|         	if (!$endian) {
 | |
| 	            $s .= chr(hexdec($hex[$i].(isset($hex[$i+1]) ? $hex[$i+1] : '0')));
 | |
|         	} else {
 | |
|                 if ($endian == "big") {
 | |
|                     $_c = $hex[$i].$hex[$i+1];
 | |
|                     $i+=2;
 | |
|                     $c = $hex[$i].$hex[$i+1];
 | |
|                     
 | |
|                     if ($_c != "00") {
 | |
|                         $s .= "?";
 | |
|                         continue;
 | |
|                     } else {
 | |
|                         $s .= chr(hexdec($c));
 | |
|                         continue;
 | |
|                     }
 | |
|                 } else if ($endian == "little") {
 | |
|                     $c = $hex[$i].$hex[$i+1];
 | |
|                     $i+=2;
 | |
|                     $_c = $hex[$i].$hex[$i+1];
 | |
|                     
 | |
|                     if ($_c != "00") {
 | |
|                         $s .= "?";
 | |
|                         continue;
 | |
|                     } else {
 | |
|                         $s .= chr(hexdec($c));
 | |
|                         continue;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         return $s;
 | |
|     }
 | |
|     
 | |
|     function deescapeString($s) {
 | |
|         $torepl = array("/\\\(\d{1,3})/e" => "chr(octdec(\\1))",
 | |
|                         "/\\\\\(/" => "(",
 | |
|                         "/\\\\\)/" => ")");
 | |
|         return preg_replace(array_keys($torepl),$torepl,$s);
 | |
|     }
 | |
| 
 | |
|     
 | |
| 
 | |
|     /**
 | |
|      * Get content of current page
 | |
|      *
 | |
|      * If more /Contents is an array, the streams are concated
 | |
|      *
 | |
|      * @return string
 | |
|      */
 | |
|     function getContent() {
 | |
|         $buffer = "";
 | |
|         
 | |
|         $contents = $this->getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
 | |
|         foreach($contents AS $tmp_content) {
 | |
|             $buffer .= $this->rebuildContentStream($tmp_content);
 | |
|         }
 | |
|         
 | |
|         return $buffer;
 | |
|     }
 | |
|     
 | |
|     
 | |
|     /**
 | |
|      * Resolve all content-objects
 | |
|      *
 | |
|      * @param array $content_ref
 | |
|      * @return array
 | |
|      */
 | |
|     function getPageContent($content_ref) {
 | |
|         $contents = array();
 | |
|         
 | |
|         if ($content_ref[0] == PDF_TYPE_OBJREF) {
 | |
|             $content = $this->pdf_resolve_object($this->c, $content_ref);
 | |
|             if ($content[1][0] == PDF_TYPE_ARRAY) {
 | |
|                 $contents = $this->getPageContent($content[1]);
 | |
|             } else {
 | |
|                 $contents[] = $content;
 | |
|             }
 | |
|         } else if ($content_ref[0] == PDF_TYPE_ARRAY) {
 | |
|             foreach ($content_ref[1] AS $tmp_content_ref) {
 | |
|                 $contents = array_merge($contents,$this->getPageContent($tmp_content_ref));
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return $contents;
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /**
 | |
|      * Rebuild content-streams
 | |
|      * only non-compressed streams and /FlateDecode are ready!
 | |
|      *
 | |
|      * @param array $obj
 | |
|      * @return string
 | |
|      */
 | |
|     function rebuildContentStream($obj) {
 | |
|         $filters = array();
 | |
|         
 | |
|         if (isset($obj[1][1]['/Filter'])) {
 | |
|             $_filter = $obj[1][1]['/Filter'];
 | |
| 
 | |
|             if ($_filter[0] == PDF_TYPE_TOKEN) {
 | |
|                 $filters[] = $_filter;
 | |
|             } else if ($_filter[0] == PDF_TYPE_ARRAY) {
 | |
|                 $filters = $_filter[1];
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         $stream = $obj[2][1];
 | |
| 		           	   
 | |
|         foreach ($filters AS $_filter) {
 | |
|             switch ($_filter[1]) {
 | |
|                 case "/FlateDecode":
 | |
|                 if (function_exists('gzuncompress')) {
 | |
|                    	   $stream = @gzuncompress($stream);
 | |
|                    } else {
 | |
|                        $this->fpdi->error(sprintf("To handle %s filter, please compile php with zlib support.",$_filter[1]));
 | |
|                    }
 | |
|                    if ($stream === false) {
 | |
|                        $this->fpdi->error("Error while decompressing string.");
 | |
|                    }
 | |
| 
 | |
|                 break;
 | |
|                 case "/LZWDecode":
 | |
|                 	@include_once("decoders/lzw.php");
 | |
|                     if (class_exists("LZWDecode")) {
 | |
| 	                    $lzwdec = new LZWDecode($this->fpdi);
 | |
| 	                    $stream = $lzwdec->decode($stream);
 | |
|                     } else {
 | |
| 						$this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
 | |
|                     }
 | |
|                 break;
 | |
|                 case "/ASCII85Decode":
 | |
|                     @include_once("decoders/ascii85.php");
 | |
|                     if (class_exists("ASCII85Decode")) {
 | |
| 	                	$ascii85 = new ASCII85Decode($this->fpdi);
 | |
| 	                    $stream = $ascii85->decode(trim($stream));
 | |
|                     } else {
 | |
|                     	$this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
 | |
|                     }
 | |
|                 break;
 | |
|                 case null:
 | |
|                    $stream = $stream;
 | |
|                 break;
 | |
|                 default:
 | |
|                    $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         return $stream;
 | |
|     }
 | |
|     
 | |
|     /**
 | |
|      * Get MediaBox
 | |
|      *
 | |
|      * gets an array that describes the size of a page.
 | |
|      *
 | |
|      * @param integer $pageno
 | |
|      * @return array @see getPageBox()
 | |
|      */
 | |
|     function getPageMediaBox($pageno) {
 | |
|         return $this->getPageBox($this->pages[$pageno-1],"/MediaBox");
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /**
 | |
|      * Get a Box from a page
 | |
|      * Arrayformat is same as used by fpdf_tpl
 | |
|      *
 | |
|      * @param array $page a /Page
 | |
|      * @param string $box_index Type of Box @see getPageBoxes()
 | |
|      * @return array
 | |
|      */
 | |
|     function getPageBox($page, $box_index) {
 | |
|         $page = $this->pdf_resolve_object($this->c,$page);
 | |
|         
 | |
|         $box = null;
 | |
|         if (isset($page[1][1][$box_index]))
 | |
|             $box =& $page[1][1][$box_index];
 | |
|         
 | |
|         if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
 | |
|             $tmp_box = $this->pdf_resolve_object($this->c,$box);
 | |
|             $box = $tmp_box[1];
 | |
|         }
 | |
|             
 | |
|         if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
 | |
|             $b =& $box[1];
 | |
|             return array("x" => $b[0][1]/$this->fpdi->k,
 | |
|                          "y" => $b[1][1]/$this->fpdi->k,
 | |
|                          "w" => $b[2][1]/$this->fpdi->k,
 | |
|                          "h" => $b[3][1]/$this->fpdi->k);
 | |
|         } else if (!isset ($page[1][1]['/Parent'])) {
 | |
|             return false;
 | |
|         } else {
 | |
|             return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Get all Boxes from /Page
 | |
|      *
 | |
|      * @param array a /Page
 | |
|      * @return array
 | |
|      */
 | |
|     function getPageBoxes($page) {
 | |
|         $_boxes = array("/MediaBox","/CropBox","/BleedBox","/TrimBox","/ArtBox");
 | |
|         $boxes = array();
 | |
| 
 | |
|         foreach($_boxes AS $box) {
 | |
|             if ($_box = $this->getPageBox($page,$box)) {
 | |
|                 $boxes[$box] = $_box;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return $boxes;
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /**
 | |
|      * Read all /Page(es)
 | |
|      *
 | |
|      * @param object pdf_context
 | |
|      * @param array /Pages
 | |
|      * @param array the result-array
 | |
|      */
 | |
|     function read_pages (&$c, &$pages, &$result) {
 | |
| 
 | |
|         // Get the kids dictionary
 | |
|     	$kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);
 | |
| 
 | |
|         if (!is_array($kids))
 | |
|             $this->fpdi->Error("Cannot find /Kids in current /Page-Dictionary");
 | |
|         foreach ($kids[1] as $v) {
 | |
|     		$pg = $this->pdf_resolve_object ($c, $v);
 | |
|             #print_r($pg);
 | |
| 
 | |
|     		if ($pg[1][1]['/Type'][1] === '/Pages') {
 | |
|                 // If one of the kids is an embedded
 | |
|     			// /Pages array, resolve it as well.
 | |
|                 $this->read_pages ($c, $pg, $result);
 | |
|     		} else {
 | |
|     			$result[] = $pg;
 | |
|     		}
 | |
|     	}
 | |
|     }
 | |
| 
 | |
|     
 | |
|     
 | |
|     /**
 | |
|      * Get PDF-Version
 | |
|      *
 | |
|      * And reset the PDF Version used in FPDI if needed
 | |
|      */
 | |
|     function getPDFVersion() {
 | |
|         parent::getPDFVersion();
 | |
|     	
 | |
|         if (isset($this->fpdi->importVersion) && $this->pdfVersion > $this->fpdi->importVersion) {
 | |
|             $this->fpdi->importVersion = $this->pdfVersion;
 | |
|         }
 | |
|     }
 | |
|     
 | |
| }
 | |
| 
 | |
| ?>
 |