Thank you for that huge code. I will try. Greetings Mirco Blitz -----Ursprüngliche Nachricht----- Von: Matt M. [mailto:h.dudeness@xxxxxxxxx] Gesendet: Mittwoch, 9. Februar 2005 22:39 An: Mirco Blitz Cc: php-general@xxxxxxxxxxxxx Betreff: Re: Parsing pdf file did you try this? <?php $test = pdf2string("<pathtoPDFfile>"); echo "$test"; # Returns a -1 if uncompression failed function pdf2string($sourcefile) { $fp = fopen($sourcefile, 'rb'); $content = fread($fp, filesize($sourcefile)); fclose($fp); # Locate all text hidden within the stream and endstream tags $searchstart = 'stream'; $searchend = 'endstream'; $pdfdocument = ""; $pos = 0; $pos2 = 0; $startpos = 0; # Iterate through each stream block while( $pos !== false && $pos2 !== false ) { # Grab beginning and end tag locations if they have not yet been parsed $pos = strpos($content, $searchstart, $startpos); $pos2 = strpos($content, $searchend, $startpos + 1); if( $pos !== false && $pos2 !== false ) { # Extract compressed text from between stream tags and uncompress $textsection = substr($content, $pos + strlen($searchstart) + 2, $pos2 - $pos - strlen($searchstart) - 1); $data = @gzuncompress($textsection); # Clean up text via a special function $data = ExtractText($data); # Increase our PDF pointer past the section we just read $startpos = $pos2 + strlen($searchend) - 1; if( $data === false ) { return -1; } $pdfdocument = $pdfdocument . $data; } } return $pdfdocument; } function ExtractText($postScriptData) { while( (($textStart = strpos($postScriptData, '(', $textStart)) && ($textEnd = strpos($postScriptData, ')', $textStart + 1)) && substr($postScriptData, $textEnd - 1) != '\\') ) { $plainText .= substr($postScriptData, $textStart + 1, $textEnd - $textStart - 1); if( substr($postScriptData, $textEnd + 1, 1) == ']' ) // This adds quite some additional spaces between the words { $plainText .= ' '; } $textStart = $textStart < $textEnd ? $textEnd : $textStart + 1; } return stripslashes($plainText); } ?> -- PHP General Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php -- PHP General Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php