User:Kevin pdf2wiki

<? global $gLbm, $gSource, $gWikiText, $gPdf; $gLbm=" "; $gSource=$gWikiText=$gPdf;

print head; Print << Enter a URL of a PDF file in the wiki or a wiki reference like "Image:turnips.pdf"    END;

if (!isset($_REQUEST['ifile']) or $_REQUEST['ifile']=='') { Print " No file submitted! Please use the form above. "; } else { Print " Loading: "; $ifile=ltrim(rtrim($_REQUEST['ifile'])); if (strtolower(substr($ifile,-4,4))!=".pdf") {die("Don't think file is a PDF");} // if filename has Image: or Media: in the file name then assume we need to grab the file from the wiki // this is so we can have another helper page that load's up a pdf file and then converts it. $ifile=preg_replace('@ @','_',$ifile);//replace spaces with underscores? if (!preg_match('@^http:@i',$ifile)) {$ifile="http://".$_SERVER['SERVER_NAME'].substr($_SERVER['PHP_SELF'],0,-12)."index.php/Media:".$ifile;} // if now http ref lets assume it is a filename and look into the media wiki for it. if (preg_match('@Image:|Media:@',$ifile)) { //get the url $if=fopen($ifile,'r') or die ("Can't open wikipage!"); $page=''; while (!feof($if)) { $page.=fread($if,16384); };		fclose($if); //look for stuff in page: //make up a new $ifile /* this is what the Media:Buttons.pdf page renders to in order to extract the exact pdf url  */		$c=preg_match('@<a href="(.*).pdf" @i', $page, $href); $ifile='http://'.$_SERVER['SERVER_NAME'].substr(substr($href[0],9),0,-2);//rebuild the reference };	// request the page from the wiki so we can grab the exact url for the pdf. Get the pdf // get url of server and wiki and call index.php asking for the media page // with regexp's pull out the href of the file and sever base to it and on with the show // now on with the show - load pdf then transform then transform to wikimarkup Print htmlentities($ifile).$gLbm; $if=fopen($ifile,'r') or die ("Can't download pdf file!"); $pdf=''; while (!feof($if)) { $pdf.=fread($if,16384); };	fclose($if); Print "Converting: "; $ifile=basename($ifile);//just want the filename $itmpfname = tempnam("/tmp", "itmp"); $otmpfname = tempnam("/tmp", "otmp"); //write pdf file into $itmpfname and some error warning into $otmpfname $handle = fopen($itmpfname, "w"); fwrite($handle, $pdf); fclose($handle); $handle = fopen($otmpfname, "w"); fwrite($handle, "Some unforseen problem occured"); fclose($handle); print "############"; // set options for heavy lifting help! infile and outfile then reload the text file output $pdf2txt="pdftotext -q -enc UTF-8 -layout -nopgbrk $itmpfname $otmpfname"; $s=`$pdf2txt`;//execute the pdftotext program $handle=fopen($otmpfname,'r') or die("Cant re-open temp file"); $out=transformtowiki(fread($handle,filesize($otmpfname))); print $gLbm; $t=date('r'); echo << $out

=Original PDF of this page=

Converted from PDF to wikimarkup at $t with pdf2wiki script.

END; //clearing up	unlink($itmpfname); unlink($otmpfname); } //completely finished print foot; //////////////////////////////////////////////////////////////////////////// // end of main function foot {return " ";} function head {return <<

pdf2wiki  pdf2wiki About: This program is a helper program to convert Pdf's into suitable text ready marked up for insertion in the wiki. Copy the text in the box below and open a new page in the wiki and paste it in! You will most likely need to edit it well. Many assumptions are made about the text to try to make it easy for you to convert your pdf, not all of them are correct in every situation. Try it and see! END; function transformtowiki($input) { $out=''; $input=explode("\n\n",$input);//split on empty paras (output of pdftotext is newline but we want to make paras for the wiki	//do stuff here	foreach ($input as $line) {		$line=ltrim($line);		if ($line==strtoupper($line) and $line!='') {$line="==".ucfirst($line)."==";} //make level 2 headings from capitalised lines		print "#";		$out.=$line."\n\n";//put back two newlines!	};	$out=preg_replace('@^(\sI\s)@m','* ',$out);//change Capital I and a space to a bullet if at beginning of line	$out=preg_replace('@^(§\s)@m','* ',$out);//change bullets at beginning of lines to asterisks	$out=preg_replace('@^•@m','*',$out);//change bullets at beginning of lines to asterisks	$out=preg_replace('@•@',"\n*",$out);//if there is a lonely bullet in middle of line (NOT AT BEGINNING make it a new line //$out=preg_replace("@\n\n\n@",'',$out);//strip out mulitple blank lines for clarity $out=preg_replace('@^([A-Z]{2,}\b){2,}@m',"==$1==\n",$out);// ([A-Z] means two or more upper case letters) replace one or more uppercased word with heading two's and a newline. $out=preg_replace("@ {2,}|\t@",' ',$out);//strip out all double spaces or tabs $out=preg_replace("@^\s+@m",'',$out);// get rid of any leading spaces $out=preg_replace('@\n\d+\.\s@',"\n# ",$out); //newline number. space change to newline # dot space - put number lists in\ return $out; } ?>
 * }