|
1 <?php |
|
2 |
|
3 /** |
|
4 * |
|
5 * Parse for URLS in the source text. |
|
6 * |
|
7 * @category Text |
|
8 * |
|
9 * @package Text_Wiki |
|
10 * |
|
11 * @author Paul M. Jones <pmjones@php.net> |
|
12 * |
|
13 * @author Moritz Venn <moritz.venn@freaque.net> |
|
14 * |
|
15 * @license LGPL |
|
16 * |
|
17 * @version $Id: Url.php,v 1.1 2005/12/06 15:54:56 ritzmo Exp $ |
|
18 * |
|
19 */ |
|
20 |
|
21 /** |
|
22 * |
|
23 * Parse for URLS in the source text. |
|
24 * |
|
25 * Various URL markings are supported: inline (the URL by itself), |
|
26 * inline (where the URL is enclosed in square brackets), and named |
|
27 * reference (where the URL is enclosed in square brackets and has a |
|
28 * name included inside the brackets). E.g.: |
|
29 * |
|
30 * inline -- http://example.com |
|
31 * undescribed -- [http://example.com] |
|
32 * described -- [http://example.com Example Description] |
|
33 * described -- [http://www.example.com|Example Description] |
|
34 * |
|
35 * When rendering a URL token, this will convert URLs pointing to a .gif, |
|
36 * .jpg, or .png image into an inline <img /> tag (for the 'xhtml' |
|
37 * format). |
|
38 * |
|
39 * Token options are: |
|
40 * |
|
41 * 'type' => ['inline'|'footnote'|'descr'] the type of URL |
|
42 * |
|
43 * 'href' => the URL link href portion |
|
44 * |
|
45 * 'text' => the displayed text of the URL link |
|
46 * |
|
47 * @category Text |
|
48 * |
|
49 * @package Text_Wiki |
|
50 * |
|
51 * @author Paul M. Jones <pmjones@php.net> |
|
52 * |
|
53 * @author Moritz Venn <moritz.venn@freaque.net> |
|
54 * |
|
55 */ |
|
56 |
|
57 class Text_Wiki_Parse_Url extends Text_Wiki_Parse { |
|
58 |
|
59 |
|
60 /** |
|
61 * |
|
62 * Keeps a running count of numbered-reference URLs. |
|
63 * |
|
64 * @access public |
|
65 * |
|
66 * @var int |
|
67 * |
|
68 */ |
|
69 |
|
70 var $footnoteCount = 0; |
|
71 |
|
72 |
|
73 /** |
|
74 * |
|
75 * URL schemes recognized by this rule. |
|
76 * |
|
77 * @access public |
|
78 * |
|
79 * @var array |
|
80 * |
|
81 */ |
|
82 |
|
83 var $conf = array( |
|
84 'schemes' => array( |
|
85 'http://', |
|
86 'https://', |
|
87 'ftp://', |
|
88 'gopher://', |
|
89 'news://', |
|
90 'mailto:', |
|
91 'irc://' |
|
92 ) |
|
93 ); |
|
94 |
|
95 |
|
96 /** |
|
97 * |
|
98 * Constructor. |
|
99 * |
|
100 * We override the constructor so we can comment the regex nicely. |
|
101 * |
|
102 * @access public |
|
103 * |
|
104 */ |
|
105 |
|
106 function Text_Wiki_Parse_Url(&$obj) |
|
107 { |
|
108 parent::Text_Wiki_Parse($obj); |
|
109 |
|
110 // convert the list of recognized schemes to a regex-safe string, |
|
111 // where the pattern delim is a slash |
|
112 $tmp = array(); |
|
113 $list = $this->getConf('schemes', array()); |
|
114 foreach ($list as $val) { |
|
115 $tmp[] = preg_quote($val, '/'); |
|
116 } |
|
117 $schemes = implode('|', $tmp); |
|
118 |
|
119 // build the regex |
|
120 $this->regex = |
|
121 "($schemes)" . // allowed schemes |
|
122 "(" . // start pattern |
|
123 "[^ \\/\"\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters; |
|
124 ")*" . // end pattern |
|
125 "[^ \\t\\n\\/\"\'{$this->wiki->delim}]*" . |
|
126 "[A-Za-z0-9\\/?=&~_]"; |
|
127 // fix for jEdit syntax highlighting bug: \" |
|
128 } |
|
129 |
|
130 |
|
131 /** |
|
132 * |
|
133 * Find three different kinds of URLs in the source text. |
|
134 * |
|
135 * @access public |
|
136 * |
|
137 */ |
|
138 |
|
139 function parse() |
|
140 { |
|
141 // ------------------------------------------------------------- |
|
142 // |
|
143 // Described-reference (named) URLs. |
|
144 // |
|
145 |
|
146 // the regular expression for this kind of URL |
|
147 $tmp_regex = '/\[(' . $this->regex . ')[ |]([^\]]+)\]/'; |
|
148 |
|
149 // use a custom callback processing method to generate |
|
150 // the replacement text for matches. |
|
151 $this->wiki->source = preg_replace_callback( |
|
152 $tmp_regex, |
|
153 array(&$this, 'processDescr'), |
|
154 $this->wiki->source |
|
155 ); |
|
156 |
|
157 |
|
158 // ------------------------------------------------------------- |
|
159 // |
|
160 // Unnamed-reference ('Ordinary'-style) URLs. |
|
161 // |
|
162 |
|
163 // the regular expression for this kind of URL |
|
164 $tmp_regex = '/\[(' . $this->regex . ')\]/U'; |
|
165 |
|
166 // use a custom callback processing method to generate |
|
167 // the replacement text for matches. |
|
168 $this->wiki->source = preg_replace_callback( |
|
169 $tmp_regex, |
|
170 //array(&$this, 'processFootnote'), |
|
171 array(&$this, 'processOrdinary'), |
|
172 $this->wiki->source |
|
173 ); |
|
174 |
|
175 |
|
176 // ------------------------------------------------------------- |
|
177 // |
|
178 // Normal inline URLs. |
|
179 // |
|
180 |
|
181 /* |
|
182 |
|
183 ## DISABLED FOR ENANO |
|
184 ## This messes up HTML links. |
|
185 |
|
186 // the regular expression for this kind of URL |
|
187 |
|
188 $tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/'; |
|
189 |
|
190 // use the standard callback for inline URLs |
|
191 $this->wiki->source = preg_replace_callback( |
|
192 $tmp_regex, |
|
193 array(&$this, 'process'), |
|
194 $this->wiki->source |
|
195 ); |
|
196 |
|
197 //$tmp_regex = '/(^|[^A-Za-z])([a-zA-Z])(.*?)/'; |
|
198 $tmp_regex = '/(^|\s)([a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)($|\s)/'; |
|
199 |
|
200 // use the standard callback for inline URLs |
|
201 $this->wiki->source = preg_replace_callback( |
|
202 $tmp_regex, |
|
203 array(&$this, 'processWithoutProtocol'), |
|
204 $this->wiki->source |
|
205 ); |
|
206 |
|
207 $tmp_regex = '/(^|\s|'.$this->wiki->delim.')<([a-zA-Z0-9\-\.%_\+\!\*\'\(\)\,]+@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)>(\s|'.$this->wiki->delim.'|$)/'; |
|
208 |
|
209 // use the standard callback for inline URLs |
|
210 $this->wiki->source = preg_replace_callback( |
|
211 $tmp_regex, |
|
212 array(&$this, 'processInlineEmail'), |
|
213 $this->wiki->source |
|
214 ); |
|
215 */ |
|
216 } |
|
217 |
|
218 |
|
219 /** |
|
220 * |
|
221 * Process inline URLs. |
|
222 * |
|
223 * @param array &$matches |
|
224 * |
|
225 * @param array $matches An array of matches from the parse() method |
|
226 * as generated by preg_replace_callback. $matches[0] is the full |
|
227 * matched string, $matches[1] is the first matched pattern, |
|
228 * $matches[2] is the second matched pattern, and so on. |
|
229 * |
|
230 * @return string The processed text replacement. |
|
231 * |
|
232 */ |
|
233 |
|
234 function process(&$matches) |
|
235 { |
|
236 // set options |
|
237 $options = array( |
|
238 'type' => 'inline', |
|
239 'href' => $matches[2], |
|
240 'text' => $matches[2] |
|
241 ); |
|
242 |
|
243 // tokenize |
|
244 return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5]; |
|
245 } |
|
246 |
|
247 function processWithoutProtocol(&$matches) |
|
248 { |
|
249 // set options |
|
250 $options = array( |
|
251 'type' => 'inline', |
|
252 'href' => 'http://'.$matches[2], |
|
253 'text' => $matches[2] |
|
254 ); |
|
255 |
|
256 // tokenize |
|
257 return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4]; |
|
258 } |
|
259 |
|
260 function processInlineEmail(&$matches) |
|
261 { |
|
262 // set options |
|
263 $options = array( |
|
264 'type' => 'inline', |
|
265 'href' => 'mailto://'.$matches[2], |
|
266 'text' => $matches[2] |
|
267 ); |
|
268 |
|
269 // tokenize |
|
270 return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4]; |
|
271 } |
|
272 |
|
273 /** |
|
274 * |
|
275 * Process numbered (footnote) URLs. |
|
276 * |
|
277 * Token options are: |
|
278 * @param array &$matches |
|
279 * |
|
280 * @param array $matches An array of matches from the parse() method |
|
281 * as generated by preg_replace_callback. $matches[0] is the full |
|
282 * matched string, $matches[1] is the first matched pattern, |
|
283 * $matches[2] is the second matched pattern, and so on. |
|
284 * |
|
285 * @return string The processed text replacement. |
|
286 * |
|
287 */ |
|
288 |
|
289 function processFootnote(&$matches) |
|
290 { |
|
291 // keep a running count for footnotes |
|
292 $this->footnoteCount++; |
|
293 |
|
294 // set options |
|
295 $options = array( |
|
296 'type' => 'footnote', |
|
297 'href' => $matches[1], |
|
298 'text' => $this->footnoteCount |
|
299 ); |
|
300 |
|
301 // tokenize |
|
302 return $this->wiki->addToken($this->rule, $options); |
|
303 } |
|
304 |
|
305 function processOrdinary(&$matches) |
|
306 { |
|
307 // keep a running count for footnotes |
|
308 $this->footnoteCount++; |
|
309 |
|
310 // set options |
|
311 $options = array( |
|
312 'type' => 'descr', |
|
313 'href' => $matches[1], |
|
314 'text' => $matches[1] |
|
315 ); |
|
316 |
|
317 // tokenize |
|
318 return $this->wiki->addToken($this->rule, $options); |
|
319 } |
|
320 |
|
321 |
|
322 /** |
|
323 * |
|
324 * Process described-reference (named-reference) URLs. |
|
325 * |
|
326 * Token options are: |
|
327 * 'type' => ['inline'|'footnote'|'descr'] the type of URL |
|
328 * 'href' => the URL link href portion |
|
329 * 'text' => the displayed text of the URL link |
|
330 * |
|
331 * @param array &$matches |
|
332 * |
|
333 * @param array $matches An array of matches from the parse() method |
|
334 * as generated by preg_replace_callback. $matches[0] is the full |
|
335 * matched string, $matches[1] is the first matched pattern, |
|
336 * $matches[2] is the second matched pattern, and so on. |
|
337 * |
|
338 * @return string The processed text replacement. |
|
339 * |
|
340 */ |
|
341 |
|
342 function processDescr(&$matches) |
|
343 { |
|
344 // set options |
|
345 $options = array( |
|
346 'type' => 'descr', |
|
347 'href' => $matches[1], |
|
348 'text' => $matches[4] |
|
349 ); |
|
350 |
|
351 // tokenize |
|
352 return $this->wiki->addToken($this->rule, $options); |
|
353 } |
|
354 } |
|
355 ?> |