diff -r 31d226269d2f -r 4797a4a88533 includes/wikiengine/parse_mediawiki.php --- a/includes/wikiengine/parse_mediawiki.php Tue Mar 30 11:34:56 2010 -0400 +++ b/includes/wikiengine/parse_mediawiki.php Tue Mar 30 11:37:00 2010 -0400 @@ -188,7 +188,20 @@ // Find all opening and closing tags - $regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s"; + $regex = ";( + < + (?: + # closing tag + /(?:$blocklevel) + | + # opening or self-closing tag + (?:$blocklevel) + (?:[ ][^>]*?)? + /? + ) + > + ) + ;xs"; // oh. and we're using this tokens thing because for identical matches, the first match will // get wrapped X number of times instead of all matches getting wrapped once; replacing each @@ -204,11 +217,11 @@ // go through the text, extract tag names, and push them to a stack. foreach ( $text_split as $splitpart ) { - if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) ) + if ( preg_match(";^<(/)?($blocklevel)( ([^>]*?(/)?>)|(/)?>);i", $splitpart, $match) ) { $tagname = $match[2]; if ( $match[1] == '/' ) - { + { // closing tag if ( $tagname != ($top = array_pop($tag_stack)) ) { @@ -224,21 +237,32 @@ } else { - // push - array_push($tag_stack, $tagname); - if ( count($tag_stack) == 1 ) - $splitpart = '<_paragraph_bypass>' . $splitpart; + // is it a self-closed tag? + // FIXME: This parser is VERY STRICT right now as far as XHTML compliance.
will + // pretty much totally break it, because it will be treated as an opening tag. + // Yes, self closing tags work. + if ( (isset($match[5]) && $match[5] === '/') || (isset($match[6]) && $match[6] === '/') ) + { + // yes + if ( count($tag_stack) == 0 ) + $splitpart = "<_paragraph_bypass>$splitpart"; + } + else + { + // opening tag - push + array_push($tag_stack, $tagname); + if ( count($tag_stack) == 1 ) + $splitpart = '<_paragraph_bypass>' . $splitpart; + } } } $text .= $splitpart; } - //echo '
' . htmlspecialchars(print_r($text, true)) . '
'; + // echo '
' . htmlspecialchars(print_r($text, true)) . '
'; } // All things that should be para-bypassed now are surrounded by _paragraph_bypass tags. - // die('
' . htmlspecialchars($text) . '
'); - RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true); // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags