diff -r fbfd6c852c89 -r d8c6b8d49604 includes/wikiengine/parse_mediawiki.php --- a/includes/wikiengine/parse_mediawiki.php Mon Feb 08 17:04:08 2010 -0500 +++ b/includes/wikiengine/parse_mediawiki.php Mon Feb 08 17:04:43 2010 -0500 @@ -22,7 +22,7 @@ 'mailtonotext' => '#\[mailto:([^ \]]+?)\]#', 'mailtowithtext' => '#\[mailto:([^ \]]+?) (.+?)\]#', 'hr' => '/^[-]{4,} *$/m', - 'code' => '/^(?:\r?\n)?(.+?)(?:\r?\n)?<\/code>$/mis' + 'code' => '/^(?:(?:\r?\n)?|
)(.+?)(?:<\/pre>|(?:\r?\n)?<\/code>)$/mis'
   );
   
   private $blockquote_rand_id;
@@ -186,35 +186,9 @@
     // Wrap all block level tags
     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
     
-    // I'm not sure why I had to go through all these alternatives. Trying to bring it
-    // all down to one by ?'ing subpatterns was causing things to return empty and throwing
-    // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
-    // of a regular expression.
-    
-    // FIXME: This regexp triggers a known PHP stack size issue under win32 and possibly
-    // other platforms (). The workaround is going to
-    // involve writing our own parser that takes care of recursion without using the stack,
-    // which is going to be a bitch, and may not make it in until Caoineag RCs.
+    // Find all opening and closing tags
     
-    $regex = ";
-              <($blocklevel)
-              (?:
-                # self closing, no attributes
-                [ ]*/>
-              |
-                # self closing, attributes
-                [ ][^>]+? />
-              |
-                # with inner text, no attributes
-                >
-                (?: (?R) | .*? )*
-              |
-                # with inner text and attributes
-                [ ][^>]+?     # attributes
-                >
-                (?: (?R) | .*? )*
-              )
-                ;sx";
+    $regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s";
                 
     // oh. and we're using this tokens thing because for identical matches, the first match will
     // get wrapped X number of times instead of all matches getting wrapped once; replacing each
@@ -222,27 +196,46 @@
     
     $tokens = array();
     $rand_id = sha1(microtime() . mt_rand());
+    $tag_stack = array();
     
-    // Temporary hack to fix crashes under win32. Sometime I'll write a loop based
-    // parser for this whole section. Maybe. Perhaps the Apache folks will fix their
-    // Windows binaries first.
-    if ( PHP_OS == 'WIN32' || PHP_OS == 'WINNT' )
+    if ( $text_split = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE) )
     {
-      $regex = str_replace("(?: (?R) | .*? )*", "(?: .*? )", $regex);
-    }
-    if ( preg_match_all($regex, $text, $matches) )
-    {
-      foreach ( $matches[0] as $i => $match )
+      $text = '';
+      // go through the text, extract tag names, and push them to a stack.
+      foreach ( $text_split as $splitpart )
       {
-        $text = str_replace_once($match, "{_pb_:$rand_id:$i}", $text);
-        $tokens[$i] = '<_paragraph_bypass>' . $match . '';
+        if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) )
+        {
+          $tagname = $match[2];
+          if ( $match[1] == '/' )
+          {
+            // closing tag
+            if ( $tagname != ($top = array_pop($tag_stack)) )
+            {
+              // invalid - push back
+              array_push($tag_stack, $top);
+            }
+            else
+            {
+              // valid - if stack's at zero, add a 
+              if ( count($tag_stack) == 0 )
+                $splitpart .= '';
+            }
+          }
+          else
+          {
+            // push
+            array_push($tag_stack, $tagname);
+            if ( count($tag_stack) == 1 )
+              $splitpart = '<_paragraph_bypass>' . $splitpart;
+          }
+        }
+        $text .= $splitpart;
       }
+      //echo '
' . htmlspecialchars(print_r($text, true)) . '
'; } - foreach ( $tokens as $i => $match ) - { - $text = str_replace_once("{_pb_:$rand_id:$i}", $match, $text); - } + // All things that should be para-bypassed now are surrounded by _paragraph_bypass tags. // die('
' . htmlspecialchars($text) . '
');