Modified Text_Wiki parser to fully support UTF-8 strings; several other UTF-8 fixes, international characters seem to work reasonably well now
--- a/includes/clientside/static/ajax.js Sat Jul 28 18:11:14 2007 -0400
+++ b/includes/clientside/static/ajax.js Sun Jul 29 17:40:36 2007 -0400
@@ -164,7 +164,7 @@
// IE <6 pseudo-compatibility
if ( KILL_SWITCH )
return true;
- goBusy('Saving page...');
+ //goBusy('Saving page...');
var text = ajaxEscape($('ajaxEditArea').getContent());
if(document.mdgAjaxEditor.minor.checked) minor='&minor';
else minor='';
--- a/includes/comment.php Sat Jul 28 18:11:14 2007 -0400
+++ b/includes/comment.php Sun Jul 29 17:40:36 2007 -0400
@@ -82,6 +82,7 @@
global $db, $session, $paths, $template, $plugins; // Common objects
$parser = new Services_JSON(SERVICES_JSON_LOOSE_TYPE);
$data = $parser->decode($json);
+ $data = decode_unicode_array($data);
if ( !isset($data['mode']) )
{
return $parser->encode(Array('mode'=>'error','error'=>'No mode defined!'));
--- a/includes/functions.php Sat Jul 28 18:11:14 2007 -0400
+++ b/includes/functions.php Sun Jul 29 17:40:36 2007 -0400
@@ -1418,6 +1418,10 @@
$_GET = strip_nul_chars($_GET);
$_COOKIE = strip_nul_chars($_COOKIE);
$_REQUEST = strip_nul_chars($_REQUEST);
+ $_POST = decode_unicode_array($_POST);
+ $_GET = decode_unicode_array($_GET);
+ $_COOKIE = decode_unicode_array($_COOKIE);
+ $_REQUEST = decode_unicode_array($_REQUEST);
}
/**
@@ -2578,6 +2582,80 @@
return $haystack;
}
+/**
+ * From http://us2.php.net/urldecode - decode %uXXXX
+ * @param string The urlencoded string
+ * @return string
+ */
+
+function decode_unicode_url($str)
+{
+ $res = '';
+
+ $i = 0;
+ $max = strlen($str) - 6;
+ while ($i <= $max)
+ {
+ $character = $str[$i];
+ if ($character == '%' && $str[$i + 1] == 'u')
+ {
+ $value = hexdec(substr($str, $i + 2, 4));
+ $i += 6;
+
+ if ($value < 0x0080)
+ {
+ // 1 byte: 0xxxxxxx
+ $character = chr($value);
+ }
+ else if ($value < 0x0800)
+ {
+ // 2 bytes: 110xxxxx 10xxxxxx
+ $character =
+ chr((($value & 0x07c0) >> 6) | 0xc0)
+ . chr(($value & 0x3f) | 0x80);
+ }
+ else
+ {
+ // 3 bytes: 1110xxxx 10xxxxxx 10xxxxxx
+ $character =
+ chr((($value & 0xf000) >> 12) | 0xe0)
+ . chr((($value & 0x0fc0) >> 6) | 0x80)
+ . chr(($value & 0x3f) | 0x80);
+ }
+ }
+ else
+ {
+ $i++;
+ }
+
+ $res .= $character;
+ }
+
+ return $res . substr($str, $i);
+}
+
+/**
+ * Recursively decodes an array with UTF-8 characters in its strings
+ * @param array Can be multi-depth
+ * @return array
+ */
+
+function decode_unicode_array($array)
+{
+ foreach ( $array as $i => $val )
+ {
+ if ( is_string($val) )
+ {
+ $array[$i] = decode_unicode_url($val);
+ }
+ else
+ {
+ $array[$i] = decode_unicode_array($val);
+ }
+ }
+ return $array;
+}
+
//die('<pre>Original: 01010101010100101010100101010101011010'."\nProcessed: ".uncompress_bitfield(compress_bitfield('01010101010100101010100101010101011010')).'</pre>');
?>
--- a/includes/pageutils.php Sat Jul 28 18:11:14 2007 -0400
+++ b/includes/pageutils.php Sun Jul 29 17:40:36 2007 -0400
@@ -838,9 +838,10 @@
$_ob .= '<p>There are currently no comments on this '.strtolower($namespace).'';
if($namespace != 'Article') $_ob .= ' page';
$_ob .= '.</p>';
- } else $_ob .= '<p>There '.$s.' on this article.</p>';
+ } else $_ob .= '<p>There '.$s.' on this article.';
if($session->get_permissions('mod_comments') && $num_unapp > 0) $_ob .= ' <span style="color: #D84308">'.$num_unapp.' of those are unapproved.</span>';
elseif(!$session->get_permissions('mod_comments') && $num_unapp > 0) { $u = ($num_unapp == 1) ? "is $num_unapp comment" : "are $num_unapp comments"; $_ob .= ' However, there ' . $u . ' awating approval.'; }
+ $_ob .= '</p>';
$list = 'list = { ';
// _die(htmlspecialchars($ttext));
$i = -1;
@@ -895,10 +896,10 @@
if($session->get_permissions('edit_comments'))
{
// Edit link
- $strings['EDIT_LINK'] = '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=editcomment&id='.$row['comment_id']).'" onclick="editComment(\''.$i.'\'); return false;" id="editbtn_'.$i.'">edit</a>';
+ $strings['EDIT_LINK'] = '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=editcomment&id='.$row['comment_id']).'" id="editbtn_'.$i.'">edit</a>';
// Delete link
- $strings['DELETE_LINK'] = '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=deletecomment&id='.$row['comment_id']).'" onclick="ajaxDeleteComment(\''.$i.'\'); return false;">delete</a>';
+ $strings['DELETE_LINK'] = '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=deletecomment&id='.$row['comment_id']).'">delete</a>';
}
else
{
@@ -917,12 +918,12 @@
// Mod links
$applink = '';
- $applink .= '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=admin&action=approve&id='.$row['comment_id']).'" onclick="ajaxCommentAdmin(\'approve\', \''.$i.'\'); return false;" id="mdgApproveLink'.$i.'">';
+ $applink .= '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=admin&action=approve&id='.$row['comment_id']).'" id="mdgApproveLink'.$i.'">';
if($row['approved']) $applink .= 'Unapprove';
else $applink .= 'Approve';
$applink .= '</a>';
$strings['MOD_APPROVE_LINK'] = $applink; unset($applink);
- $strings['MOD_DELETE_LINK'] = '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=admin&action=delete&id='.$row['comment_id']).'" onclick="ajaxCommentAdmin(\'delete\', \''.$i.'\'); return false;">Delete</a>';
+ $strings['MOD_DELETE_LINK'] = '<a href="'.makeUrlNS($namespace, $page_id, 'do=comments&sub=admin&action=delete&id='.$row['comment_id']).'">Delete</a>';
// Signature
$strings['SIGNATURE'] = '';
--- a/includes/wikiengine/Render/Xhtml.php Sat Jul 28 18:11:14 2007 -0400
+++ b/includes/wikiengine/Render/Xhtml.php Sun Jul 29 17:40:36 2007 -0400
@@ -61,11 +61,12 @@
// have to check null and false because HTML_ENTITIES is a zero
if ($type === HTML_ENTITIES) {
+ /*
// keep a copy of the translated version of the delimiter
// so we can convert it back.
$new_delim = htmlentities($this->wiki->delim, $quotes, $charset);
-
+
// convert the entities. we silence the call here so that
// errors about charsets don't pop up, per counsel from
// Jan at Horde. (http://pear.php.net/bugs/bug.php?id=4474)
@@ -84,6 +85,7 @@
$text = str_replace(
$new_delim, $this->wiki->delim, $text
);
+ */
} elseif ($type === HTML_SPECIALCHARS) {
--- a/includes/wikiformat.php Sat Jul 28 18:11:14 2007 -0400
+++ b/includes/wikiformat.php Sun Jul 29 17:40:36 2007 -0400
@@ -379,7 +379,7 @@
if ($this->isError($result)) {
return $result;
}
-
+
if (is_object($this->formatObj[$format])) {
$output .= $this->formatObj[$format]->pre();
}
@@ -387,7 +387,7 @@
foreach (array_keys($this->_countRulesTokens) as $rule) {
$this->loadRenderObj($format, $rule);
}
-
+
$k = strlen($this->source);
for ($i = 0; $i < $k; $i++) {
--- a/schema.sql Sat Jul 28 18:11:14 2007 -0400
+++ b/schema.sql Sun Jul 29 17:40:36 2007 -0400
@@ -14,7 +14,7 @@
page_id varchar(64),
namespace varchar(64),
category_id varchar(64)
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}comments(
comment_id int(12) NOT NULL auto_increment,
@@ -27,12 +27,12 @@
user_id mediumint(8) NOT NULL DEFAULT -1,
time int(12) NOT NULL DEFAULT 0,
PRIMARY KEY ( comment_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}config(
config_name varchar(63),
config_value text
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}logs(
log_type varchar(16),
@@ -46,14 +46,14 @@
author varchar(63),
edit_summary text,
minor_edit tinyint(1)
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}page_text(
page_id varchar(63),
namespace varchar(16) NOT NULL default 'Article',
page_text text,
char_tag varchar(63)
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}pages(
page_order int(8),
@@ -68,7 +68,7 @@
delvotes int(10) NOT NULL default 0,
password varchar(40) NOT NULL DEFAULT '',
delvote_ips text NOT NULL
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}session_keys(
session_key varchar(32),
@@ -77,7 +77,7 @@
auth_level tinyint(1) NOT NULL default '0',
source_ip varchar(10) default '0x7f000001',
time bigint(15) default '0'
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}themes(
theme_id varchar(63),
@@ -85,7 +85,7 @@
theme_order smallint(5) NOT NULL default '1',
default_style varchar(63) NOT NULL DEFAULT '',
enabled tinyint(1) NOT NULL default '1'
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}users(
user_id mediumint(8) NOT NULL auto_increment,
@@ -105,7 +105,7 @@
temp_password_time int(12) NOT NULL DEFAULT 0,
user_coppa tinyint(1) NOT NULL DEFAULT 0,
PRIMARY KEY (user_id)
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}users_extra(
user_id mediumint(8) NOT NULL,
@@ -119,7 +119,7 @@
user_hobbies text,
email_public tinyint(1) NOT NULL DEFAULT 0,
PRIMARY KEY ( user_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}banlist(
ban_id mediumint(8) NOT NULL auto_increment,
@@ -128,7 +128,7 @@
is_regex tinyint(1) DEFAULT 0,
reason text,
PRIMARY KEY ( ban_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}files(
file_id int(12) NOT NULL auto_increment,
@@ -140,7 +140,7 @@
file_extension varchar(8) default NULL,
file_key varchar(32) NOT NULL,
PRIMARY KEY (file_id)
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}buddies(
buddy_id int(15) NOT NULL auto_increment,
@@ -148,7 +148,7 @@
buddy_user_id mediumint(8),
is_friend tinyint(1) NOT NULL default '1',
PRIMARY KEY (buddy_id)
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}privmsgs(
message_id int(15) NOT NULL auto_increment,
@@ -160,7 +160,7 @@
folder_name varchar(63),
message_read tinyint(1) NOT NULL DEFAULT 0,
PRIMARY KEY (message_id)
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}sidebar(
item_id smallint(3) NOT NULL auto_increment,
@@ -171,7 +171,7 @@
block_type tinyint(1) NOT NULL DEFAULT 0,
block_content text,
PRIMARY KEY ( item_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}hits(
hit_id bigint(20) NOT NULL auto_increment,
@@ -180,13 +180,13 @@
page_id varchar(63),
namespace varchar(63),
PRIMARY KEY ( hit_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}search_index(
word varbinary(64) NOT NULL,
page_names text,
PRIMARY KEY ( word )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}groups(
group_id mediumint(5) UNSIGNED NOT NULL auto_increment,
@@ -194,7 +194,7 @@
group_type tinyint(1) NOT NULL DEFAULT 1,
PRIMARY KEY ( group_id ),
system_group tinyint(1) NOT NULL DEFAULT 0
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}group_members(
member_id int(12) UNSIGNED NOT NULL auto_increment,
@@ -203,7 +203,7 @@
is_mod tinyint(1) NOT NULL DEFAULT 0,
pending tinyint(1) NOT NULL DEFAULT 0,
PRIMARY KEY ( member_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}acl(
rule_id int(12) UNSIGNED NOT NULL auto_increment,
@@ -213,7 +213,7 @@
namespace varchar(24),
rules text,
PRIMARY KEY ( rule_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
CREATE TABLE {{TABLE_PREFIX}}search_cache(
search_id int(15) NOT NULL auto_increment,
@@ -221,7 +221,7 @@
query text,
results longblob,
PRIMARY KEY ( search_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
-- Added in 1.0.1
@@ -231,7 +231,7 @@
pg_name varchar(255) NOT NULL DEFAULT '',
pg_target varchar(255) DEFAULT NULL,
PRIMARY KEY ( pg_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
-- Added in 1.0.1
@@ -241,7 +241,7 @@
page_id varchar(63) NOT NULL,
namespace varchar(63) NOT NULL DEFAULT 'Article',
PRIMARY KEY ( pg_member_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
-- Added in 1.0.1
@@ -252,7 +252,7 @@
namespace varchar(255) NOT NULL,
user mediumint(8) NOT NULL DEFAULT 1,
PRIMARY KEY ( tag_id )
-) CHARACTER SET `utf8` COLLATE `utf8_bin`;
+) CHARACTER SET `utf8`;
INSERT INTO {{TABLE_PREFIX}}config(config_name, config_value) VALUES
('site_name', '{{SITE_NAME}}'),