Willkommen in der Webstatt Zum Webstatt Blog und Stories
nuit nuit am 12.04.07 03:40

WikipediaSyntaxParser v.0.1
Ein BBCode Parser

<?php
class MediaWikiParser {
private $text;

function __construct($text) {
$this->text = $text;
}

public function parse() {
$this->_persondata();
$this->_parseTable();
$this->_bold();
$this->_italic();
$this->_headlines();
$this->_lines();
//$this->_references();
$this->_definitionlist();
$this->_poem();

return $this->text;
}

private function _bold() {
$this->text = preg_replace("/'''(.*?)'''/i","<b>$1</b>",$this->text);
}

private function _italic() {
$this->text = preg_replace("/''(.*?)''/i","<i>$1</i>",$this->text);
}

private function _headlines() {
preg_match_all("/^([=]+)(.*?)(?:([=]+)\n|\n)/i",$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
$laenge = strlen($match[1][$i]);

$this->text = str_replace($match[1][$i].$match[2][$i].$match[3][$i],'<h'.$laenge.'>'.substr($match[2][$i],1,-1).'</h'.$laenge.'>',$this->text);
}
}

private function _lines() {
$this->text = preg_replace('/[\-]{4,}\n$/i',"<hr />\n",$this->text);
}

private function _definitionlist() {
preg_match_all('/;(.*?)(\n|)(:.*?\n)+/is',$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
$buffer = "<dl>\n";
if($match[2][$i] == "\n") {
$lines = explode("\n",$match[0][$i]);
$buffer .= "<dt>".substr($lines[0],1,strlen($lines[0]))."</dt>\n";
for($z = 1; $z < count($lines); $z++) {
if(!empty($lines[$z])) {
$buffer .= "<dd>".substr($lines[$z],1,strlen($lines[$z]))."</dd>\n";
}
}
} else {
preg_match("/^:(?: |)(.*?)$/i",$match[3][$i],$a);
$buffer .= "<dt>".$match[1][$i]."</dt>\n";
$buffer .= "<dd>".$a[1]."</dd>\n";
}
$buffer .= "</dl>\n";
$this->text = str_replace($match[0][$i],$buffer,$this->text);
}
}

private function _poem() {
$this->text = preg_replace('/<poem>(.*?)<\/poem>/is','<div class="poem">\n<p>$1</p>\n</div>',$this->text);
}

private function _persondata() {
preg_match_all('/{{Personendaten\n(.*?)\n}}/is',$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
$l = explode("\n",$match[1][$i]);
$buffer = "<table>\n";
for($z = 0; $z < count($l); $z++) {
preg_match('/\|(.*?)=(.*?)$/i',$l[$z],$m);
$buffer .= "<tr>\n";
$buffer .= "<td>".$m[1]."</td>\n";
$buffer .= "<td>".$m[2]."</td>\n";
$buffer .= "</tr>\n";
}
$buffer .= "</table>\n";
$this->text = str_replace("{{Personendaten\n".$match[1][$i]."\n}}",$buffer,$this->text);
}
}

private function _parseTable() {
$lines = explode("\n",$this->text);
$layer = 0;
$buffer = '';
$save = false;

foreach($lines as $line) {
if(substr($line,0,1) == "|" and substr($line,1,1) != "}") {
if($line == "|-") {
$buffer .= "</tr>\n<tr>\n";
}
elseif($line == '|') {
$buffer .= "<td>\n";
} else {
if(strpos($line,"||") === false) {
$parts = explode('|',$line);

if(count($parts) > 2) {
$buffer .= "<td ".trim($parts[1]).">".$parts[2]."</td>\n";
} else {
$buffer .= "<td>".trim($parts[1])."</td>\n";
}
} else {
$parts = explode('|',$line);
for($i = 1; $i < count($parts); $i+=2) {
$buffer .= "<td>".trim($parts[$i])."</td>\n";
}
}
}
}
elseif(preg_match("/\{\|(.*?)$/i",$line,$m)) {
$save = true;
$buffer .= "<table ".trim($m[1]).">\n<tr>\n";
$layer++;
#print_r($m);
}
elseif($line == "|}") {
$buffer .= "</tr>\n</table>\n";
if($layer > 1) {
$buffer .= "</td>\n";
}
elseif($layer == 1) {
$save = false;

$this->text = str_replace(trim($linebuffer.$line),$buffer,$this->text);
$linebuffer = '';
$buffer = '';
}
$layer--;
}
elseif(substr($line,0,1) == "!") {
$parts = explode("|",$line);
if(count($parts) > 1) {
$buffer .= "<th ".trim(str_replace('!','',$parts[0])).">".trim($parts[1])."</th>\n";
} else {
$buffer .= "<th>".trim(str_replace('!','',$parts[0]))."</th>\n";
}
}
if($save) {
$linebuffer .= $line."\n";
}
}
}
}

/*private function _references() {
$references = array();

preg_match_all("/<ref(.*?)>(.*?)<\/ref>/is",$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
if(substr($match[1][$i],-1, strlen($match[1][$i])) != '/') {

}
}
}*/

$text = file_get_contents('table');

#$text = '--'."\n";

$o = new MediaWikiParser($text);
print $o->parse();
?>

sie kann bisher fast noch garnichts, ausser tabellen und das ausgiebig....die auch in allen formen wie sie wikipedia nur so zu bieten hat ;D

netcup.de Warum gibt es hier Werbung?
Creative Commons Lizenzvertrag
Alle Inhalte des Webstatt-Archivs stehen unter einer Creative Commons Namensnennung - Weitergabe unter gleichen Bedingungen 3.0 Unported Lizenz.

Impressum & Kontakt