includes/wikiengine/Parse/Mediawiki/Url.php
changeset 1 fe660c52c48f
equal deleted inserted replaced
0:902822492a68 1:fe660c52c48f
       
     1 <?php
       
     2 
       
     3 /**
       
     4 * 
       
     5 * Parse for URLS in the source text.
       
     6 * 
       
     7 * @category Text
       
     8 * 
       
     9 * @package Text_Wiki
       
    10 * 
       
    11 * @author Paul M. Jones <pmjones@php.net>
       
    12 * 
       
    13 * @author Moritz Venn <moritz.venn@freaque.net>
       
    14 * 
       
    15 * @license LGPL
       
    16 * 
       
    17 * @version $Id: Url.php,v 1.1 2005/12/06 15:54:56 ritzmo Exp $
       
    18 * 
       
    19 */
       
    20 
       
    21 /**
       
    22 * 
       
    23 * Parse for URLS in the source text.
       
    24 * 
       
    25 * Various URL markings are supported: inline (the URL by itself),
       
    26 * inline (where the URL is enclosed in square brackets), and named
       
    27 * reference (where the URL is enclosed in square brackets and has a
       
    28 * name included inside the brackets).  E.g.:
       
    29 *
       
    30 * inline      -- http://example.com
       
    31 * undescribed -- [http://example.com]
       
    32 * described   -- [http://example.com Example Description]
       
    33 * described   -- [http://www.example.com|Example Description]
       
    34 *
       
    35 * When rendering a URL token, this will convert URLs pointing to a .gif,
       
    36 * .jpg, or .png image into an inline <img /> tag (for the 'xhtml'
       
    37 * format).
       
    38 *
       
    39 * Token options are:
       
    40 * 
       
    41 * 'type' => ['inline'|'footnote'|'descr'] the type of URL
       
    42 * 
       
    43 * 'href' => the URL link href portion
       
    44 * 
       
    45 * 'text' => the displayed text of the URL link
       
    46 * 
       
    47 * @category Text
       
    48 * 
       
    49 * @package Text_Wiki
       
    50 * 
       
    51 * @author Paul M. Jones <pmjones@php.net>
       
    52 * 
       
    53 * @author Moritz Venn <moritz.venn@freaque.net>
       
    54 * 
       
    55 */
       
    56 
       
    57 class Text_Wiki_Parse_Url extends Text_Wiki_Parse {
       
    58     
       
    59     
       
    60     /**
       
    61     * 
       
    62     * Keeps a running count of numbered-reference URLs.
       
    63     * 
       
    64     * @access public
       
    65     * 
       
    66     * @var int
       
    67     * 
       
    68     */
       
    69     
       
    70     var $footnoteCount = 0;
       
    71     
       
    72     
       
    73     /**
       
    74     * 
       
    75     * URL schemes recognized by this rule.
       
    76     * 
       
    77     * @access public
       
    78     * 
       
    79     * @var array
       
    80     * 
       
    81     */
       
    82     
       
    83     var $conf = array(
       
    84         'schemes' => array(
       
    85             'http://',
       
    86             'https://',
       
    87             'ftp://',
       
    88             'gopher://',
       
    89             'news://',
       
    90             'mailto:',
       
    91             'irc://'
       
    92         )
       
    93     );
       
    94     
       
    95     
       
    96     /**
       
    97     * 
       
    98     * Constructor.
       
    99     * 
       
   100     * We override the constructor so we can comment the regex nicely.
       
   101     * 
       
   102     * @access public
       
   103     * 
       
   104     */
       
   105     
       
   106     function Text_Wiki_Parse_Url(&$obj)
       
   107     {
       
   108         parent::Text_Wiki_Parse($obj);
       
   109         
       
   110         // convert the list of recognized schemes to a regex-safe string,
       
   111         // where the pattern delim is a slash
       
   112         $tmp = array();
       
   113         $list = $this->getConf('schemes', array());
       
   114         foreach ($list as $val) {
       
   115             $tmp[] = preg_quote($val, '/');
       
   116         }
       
   117         $schemes = implode('|', $tmp);
       
   118         
       
   119         // build the regex
       
   120         $this->regex =
       
   121             "($schemes)" . // allowed schemes
       
   122             "(" . // start pattern
       
   123             "[^ \\/\"\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters;
       
   124             ")*" . // end pattern
       
   125             "[^ \\t\\n\\/\"\'{$this->wiki->delim}]*" .
       
   126             "[A-Za-z0-9\\/?=&~_]";
       
   127             // fix for jEdit syntax highlighting bug: \"
       
   128     }
       
   129     
       
   130     
       
   131     /**
       
   132     * 
       
   133     * Find three different kinds of URLs in the source text.
       
   134     *
       
   135     * @access public
       
   136     * 
       
   137     */
       
   138     
       
   139     function parse()
       
   140     {
       
   141         // -------------------------------------------------------------
       
   142         // 
       
   143         // Described-reference (named) URLs.
       
   144         // 
       
   145 
       
   146         // the regular expression for this kind of URL
       
   147         $tmp_regex = '/\[(' . $this->regex . ')[ |]([^\]]+)\]/';
       
   148 
       
   149         // use a custom callback processing method to generate
       
   150         // the replacement text for matches.
       
   151         $this->wiki->source = preg_replace_callback(
       
   152             $tmp_regex,
       
   153             array(&$this, 'processDescr'),
       
   154             $this->wiki->source
       
   155         );
       
   156 
       
   157         
       
   158         // -------------------------------------------------------------
       
   159         // 
       
   160         // Unnamed-reference ('Ordinary'-style) URLs.
       
   161         // 
       
   162         
       
   163         // the regular expression for this kind of URL
       
   164         $tmp_regex = '/\[(' . $this->regex . ')\]/U';
       
   165         
       
   166         // use a custom callback processing method to generate
       
   167         // the replacement text for matches.
       
   168         $this->wiki->source = preg_replace_callback(
       
   169             $tmp_regex,
       
   170             //array(&$this, 'processFootnote'),
       
   171             array(&$this, 'processOrdinary'),
       
   172             $this->wiki->source
       
   173         );
       
   174         
       
   175         
       
   176         // -------------------------------------------------------------
       
   177         // 
       
   178         // Normal inline URLs.
       
   179         // 
       
   180         
       
   181         /*
       
   182         
       
   183         ## DISABLED FOR ENANO
       
   184         ## This messes up HTML links.
       
   185         
       
   186         // the regular expression for this kind of URL
       
   187         
       
   188         $tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/';
       
   189         
       
   190         // use the standard callback for inline URLs
       
   191         $this->wiki->source = preg_replace_callback(
       
   192             $tmp_regex,
       
   193             array(&$this, 'process'),
       
   194             $this->wiki->source
       
   195         );
       
   196 
       
   197         //$tmp_regex = '/(^|[^A-Za-z])([a-zA-Z])(.*?)/';
       
   198         $tmp_regex = '/(^|\s)([a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)($|\s)/';
       
   199         
       
   200         // use the standard callback for inline URLs
       
   201         $this->wiki->source = preg_replace_callback(
       
   202             $tmp_regex,
       
   203             array(&$this, 'processWithoutProtocol'),
       
   204             $this->wiki->source
       
   205         );
       
   206 
       
   207         $tmp_regex = '/(^|\s|'.$this->wiki->delim.')<([a-zA-Z0-9\-\.%_\+\!\*\'\(\)\,]+@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)>(\s|'.$this->wiki->delim.'|$)/';
       
   208         
       
   209         // use the standard callback for inline URLs
       
   210         $this->wiki->source = preg_replace_callback(
       
   211             $tmp_regex,
       
   212             array(&$this, 'processInlineEmail'),
       
   213             $this->wiki->source
       
   214         );
       
   215         */
       
   216     }
       
   217     
       
   218     
       
   219     /**
       
   220     * 
       
   221     * Process inline URLs.
       
   222     * 
       
   223     * @param array &$matches
       
   224     * 
       
   225     * @param array $matches An array of matches from the parse() method
       
   226     * as generated by preg_replace_callback.  $matches[0] is the full
       
   227     * matched string, $matches[1] is the first matched pattern,
       
   228     * $matches[2] is the second matched pattern, and so on.
       
   229     * 
       
   230     * @return string The processed text replacement.
       
   231     * 
       
   232     */ 
       
   233     
       
   234     function process(&$matches)
       
   235     {
       
   236         // set options
       
   237         $options = array(
       
   238             'type' => 'inline',
       
   239             'href' => $matches[2],
       
   240             'text' => $matches[2]
       
   241         );
       
   242         
       
   243         // tokenize
       
   244         return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5];
       
   245     }
       
   246 
       
   247     function processWithoutProtocol(&$matches)
       
   248     {
       
   249         // set options
       
   250         $options = array(
       
   251             'type' => 'inline',
       
   252             'href' => 'http://'.$matches[2],
       
   253             'text' => $matches[2]
       
   254         );
       
   255         
       
   256         // tokenize
       
   257         return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
       
   258     }
       
   259 
       
   260     function processInlineEmail(&$matches)
       
   261     {
       
   262         // set options
       
   263         $options = array(
       
   264             'type' => 'inline',
       
   265             'href' => 'mailto://'.$matches[2],
       
   266             'text' => $matches[2]
       
   267         );
       
   268         
       
   269         // tokenize
       
   270         return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
       
   271     }    
       
   272     
       
   273     /**
       
   274     * 
       
   275     * Process numbered (footnote) URLs.
       
   276     * 
       
   277     * Token options are:
       
   278     * @param array &$matches
       
   279     * 
       
   280     * @param array $matches An array of matches from the parse() method
       
   281     * as generated by preg_replace_callback.  $matches[0] is the full
       
   282     * matched string, $matches[1] is the first matched pattern,
       
   283     * $matches[2] is the second matched pattern, and so on.
       
   284     * 
       
   285     * @return string The processed text replacement.
       
   286     * 
       
   287     */ 
       
   288     
       
   289     function processFootnote(&$matches)
       
   290     {
       
   291         // keep a running count for footnotes 
       
   292         $this->footnoteCount++;
       
   293         
       
   294         // set options
       
   295         $options = array(
       
   296             'type' => 'footnote',
       
   297             'href' => $matches[1],
       
   298             'text' => $this->footnoteCount
       
   299         );
       
   300         
       
   301         // tokenize
       
   302         return $this->wiki->addToken($this->rule, $options);
       
   303     }
       
   304     
       
   305      function processOrdinary(&$matches)
       
   306     {
       
   307     	// keep a running count for footnotes 
       
   308         $this->footnoteCount++;
       
   309         
       
   310         // set options
       
   311         $options = array(
       
   312             'type' => 'descr',
       
   313             'href' => $matches[1],
       
   314             'text' => $matches[1]
       
   315         );
       
   316         
       
   317         // tokenize
       
   318         return $this->wiki->addToken($this->rule, $options);
       
   319     }
       
   320     
       
   321     
       
   322     /**
       
   323     * 
       
   324     * Process described-reference (named-reference) URLs.
       
   325     * 
       
   326     * Token options are:
       
   327     *     'type' => ['inline'|'footnote'|'descr'] the type of URL
       
   328     *     'href' => the URL link href portion
       
   329     *     'text' => the displayed text of the URL link
       
   330     * 
       
   331     * @param array &$matches
       
   332     * 
       
   333     * @param array $matches An array of matches from the parse() method
       
   334     * as generated by preg_replace_callback.  $matches[0] is the full
       
   335     * matched string, $matches[1] is the first matched pattern,
       
   336     * $matches[2] is the second matched pattern, and so on.
       
   337     * 
       
   338     * @return string The processed text replacement.
       
   339     * 
       
   340     */ 
       
   341     
       
   342     function processDescr(&$matches)
       
   343     {
       
   344         // set options
       
   345         $options = array(
       
   346             'type' => 'descr',
       
   347             'href' => $matches[1],
       
   348             'text' => $matches[4]
       
   349         );
       
   350 
       
   351         // tokenize
       
   352         return $this->wiki->addToken($this->rule, $options);
       
   353     }
       
   354 }
       
   355 ?>