WikipediaSyntaxParser v.0.1
Ein BBCode Parser
<?php
class MediaWikiParser {
private $text;
function __construct($text) {
$this->text = $text;
}
public function parse() {
$this->_persondata();
$this->_parseTable();
$this->_bold();
$this->_italic();
$this->_headlines();
$this->_lines();
//$this->_references();
$this->_definitionlist();
$this->_poem();
return $this->text;
}
private function _bold() {
$this->text = preg_replace("/'''(.*?)'''/i","<b>$1</b>",$this->text);
}
private function _italic() {
$this->text = preg_replace("/''(.*?)''/i","<i>$1</i>",$this->text);
}
private function _headlines() {
preg_match_all("/^([=]+)(.*?)(?[=]+)\n|\n)/i",$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
$laenge = strlen($match[1][$i]);
$this->text = str_replace($match[1][$i].$match[2][$i].$match[3][$i],'<h'.$laenge.'>'.substr($match[2][$i],1,-1).'</h'.$laenge.'>',$this->text);
}
}
private function _lines() {
$this->text = preg_replace('/[\-]{4,}\n$/i',"<hr />\n",$this->text);
}
private function _definitionlist() {
preg_match_all('/;(.*?)(\n|)(:.*?\n)+/is',$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
$buffer = "<dl>\n";
if($match[2][$i] == "\n" {
$lines = explode("\n",$match[0][$i]);
$buffer .= "<dt>".substr($lines[0],1,strlen($lines[0]))."</dt>\n";
for($z = 1; $z < count($lines); $z++) {
if(!empty($lines[$z])) {
$buffer .= "<dd>".substr($lines[$z],1,strlen($lines[$z]))."</dd>\n";
}
}
} else {
preg_match("/^?: |)(.*?)$/i",$match[3][$i],$a);
$buffer .= "<dt>".$match[1][$i]."</dt>\n";
$buffer .= "<dd>".$a[1]."</dd>\n";
}
$buffer .= "</dl>\n";
$this->text = str_replace($match[0][$i],$buffer,$this->text);
}
}
private function _poem() {
$this->text = preg_replace('/<poem>(.*?)<\/poem>/is','<div class="poem">\n<p>$1</p>\n</div>',$this->text);
}
private function _persondata() {
preg_match_all('/{{Personendaten\n(.*?)\n}}/is',$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
$l = explode("\n",$match[1][$i]);
$buffer = "<table>\n";
for($z = 0; $z < count($l); $z++) {
preg_match('/\|(.*?)=(.*?)$/i',$l[$z],$m);
$buffer .= "<tr>\n";
$buffer .= "<td>".$m[1]."</td>\n";
$buffer .= "<td>".$m[2]."</td>\n";
$buffer .= "</tr>\n";
}
$buffer .= "</table>\n";
$this->text = str_replace("{{Personendaten\n".$match[1][$i]."\n}}",$buffer,$this->text);
}
}
private function _parseTable() {
$lines = explode("\n",$this->text);
$layer = 0;
$buffer = '';
$save = false;
foreach($lines as $line) {
if(substr($line,0,1) == "|" and substr($line,1,1) != "}" {
if($line == "|-" {
$buffer .= "</tr>\n<tr>\n";
}
elseif($line == '|' {
$buffer .= "<td>\n";
} else {
if(strpos($line,"||" === false) {
$parts = explode('|',$line);
if(count($parts) > 2) {
$buffer .= "<td ".trim($parts[1]).">".$parts[2]."</td>\n";
} else {
$buffer .= "<td>".trim($parts[1])."</td>\n";
}
} else {
$parts = explode('|',$line);
for($i = 1; $i < count($parts); $i+=2) {
$buffer .= "<td>".trim($parts[$i])."</td>\n";
}
}
}
}
elseif(preg_match("/\{\|(.*?)$/i",$line,$m)) {
$save = true;
$buffer .= "<table ".trim($m[1]).">\n<tr>\n";
$layer++;
#print_r($m);
}
elseif($line == "|}" {
$buffer .= "</tr>\n</table>\n";
if($layer > 1) {
$buffer .= "</td>\n";
}
elseif($layer == 1) {
$save = false;
$this->text = str_replace(trim($linebuffer.$line),$buffer,$this->text);
$linebuffer = '';
$buffer = '';
}
$layer--;
}
elseif(substr($line,0,1) == "!" {
$parts = explode("|",$line);
if(count($parts) > 1) {
$buffer .= "<th ".trim(str_replace('!','',$parts[0])).">".trim($parts[1])."</th>\n";
} else {
$buffer .= "<th>".trim(str_replace('!','',$parts[0]))."</th>\n";
}
}
if($save) {
$linebuffer .= $line."\n";
}
}
}
}
/*private function _references() {
$references = array();
preg_match_all("/<ref(.*?)>(.*?)<\/ref>/is",$this->text,$match);
for($i = 0; $i < count($match[0]); $i++) {
if(substr($match[1][$i],-1, strlen($match[1][$i])) != '/' {
}
}
}*/
$text = file_get_contents('table';
#$text = '--'."\n";
$o = new MediaWikiParser($text);
print $o->parse();
?>
sie kann bisher fast noch garnichts, ausser tabellen und das ausgiebig....die auch in allen formen wie sie wikipedia nur so zu bieten hat ;D