author | Dan |
Wed, 09 Jul 2008 22:49:06 -0400 | |
changeset 622 | 734af34cd7ad |
parent 606 | daf8c556ada7 |
child 685 | 17ebe24cdf85 |
permissions | -rw-r--r-- |
1 | 1 |
<?php |
2 |
||
3 |
/* |
|
4 |
* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between |
|
536 | 5 |
* Version 1.1.4 (Caoineag alpha 4) |
6 |
* Copyright (C) 2006-2008 Dan Fuhry |
|
1 | 7 |
* search.php - algorithm used to search pages |
8 |
* |
|
9 |
* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License |
|
10 |
* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
|
11 |
* |
|
12 |
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied |
|
13 |
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details. |
|
14 |
*/ |
|
15 |
||
16 |
/** |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
17 |
* In Enano versions prior to 1.0.2, this class provided a search function that was keyword-based and allowed boolean searches. It was |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
18 |
* cut from Coblynau and replaced with perform_search(), later in this file, because of speed issues. Now mostly deprecated. The only |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
19 |
* thing remaining is the buildIndex function, which is still used by the path manager and the new search framework. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
20 |
* |
1 | 21 |
* @package Enano |
22 |
* @subpackage Page management frontend |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
23 |
* @license GNU General Public License <http://enanocms.org/Special:GNU_General_Public_License> |
1 | 24 |
*/ |
25 |
||
26 |
class Searcher |
|
27 |
{ |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
28 |
|
1 | 29 |
var $results; |
30 |
var $index; |
|
31 |
var $warnings; |
|
32 |
var $match_case = false; |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
33 |
|
1 | 34 |
function buildIndex($texts) |
35 |
{ |
|
36 |
$this->index = Array(); |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
37 |
$stopwords = get_stopwords(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
38 |
|
1 | 39 |
foreach($texts as $i => $l) |
40 |
{ |
|
41 |
$seed = md5(microtime(true) . mt_rand()); |
|
42 |
$texts[$i] = str_replace("'", 'xxxApoS'.$seed.'xxx', $texts[$i]); |
|
43 |
$texts[$i] = preg_replace('#([\W_]+)#i', ' ', $texts[$i]); |
|
44 |
$texts[$i] = preg_replace('#([ ]+?)#', ' ', $texts[$i]); |
|
45 |
$texts[$i] = preg_replace('#([\']*){2,}#s', '', $texts[$i]); |
|
46 |
$texts[$i] = str_replace('xxxApoS'.$seed.'xxx', "'", $texts[$i]); |
|
47 |
$l = $texts[$i]; |
|
48 |
$words = Array(); |
|
49 |
$good_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\' '; |
|
50 |
$good_chars = enano_str_split($good_chars, 1); |
|
51 |
$letters = enano_str_split($l, 1); |
|
52 |
foreach($letters as $x => $t) |
|
53 |
{ |
|
54 |
if(!in_array($t, $good_chars)) |
|
55 |
unset($letters[$x]); |
|
56 |
} |
|
57 |
$letters = implode('', $letters); |
|
58 |
$words = explode(' ', $letters); |
|
59 |
foreach($words as $c => $w) |
|
60 |
{ |
|
371
dc6026376919
Improved compatibility with PostgreSQL and fixed a number of installer bugs; fixed missing "meta" category declaration in language files
Dan
parents:
340
diff
changeset
|
61 |
if(strlen($w) < 2 || in_array($w, $stopwords) || strlen($w) > 63 || preg_match('/[\']{2,}/', $w)) |
1 | 62 |
unset($words[$c]); |
63 |
else |
|
64 |
$words[$c] = $w; |
|
65 |
} |
|
66 |
$words = array_values($words); |
|
67 |
foreach($words as $c => $w) |
|
68 |
{ |
|
69 |
if(isset($this->index[$w])) |
|
70 |
{ |
|
71 |
if(!in_array($i, $this->index[$w])) |
|
72 |
$this->index[$w][] = $i; |
|
73 |
} |
|
74 |
else |
|
75 |
{ |
|
76 |
$this->index[$w] = Array(); |
|
77 |
$this->index[$w][] = $i; |
|
78 |
} |
|
79 |
} |
|
80 |
} |
|
81 |
foreach($this->index as $k => $v) |
|
82 |
{ |
|
83 |
$this->index[$k] = implode(',', $this->index[$k]); |
|
84 |
} |
|
85 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
86 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
87 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
88 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
89 |
* Searches the site for the specified string and returns an array with each value being an array filled with the following: |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
90 |
* page_id: string, self-explanatory |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
91 |
* namespace: string, self-explanatory |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
92 |
* page_length: integer, the length of the full page in bytes |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
93 |
* page_text: string, the contents of the page (trimmed to ~150 bytes if necessary) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
94 |
* score: numerical relevance score, 1-100, rounded to 2 digits and calculated based on which terms were present and which were not |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
95 |
* @param string Search query |
499
6b7fdd898ba3
Fixed some bugs with PostgreSQL and added a word_lcase column to the search_index table because collation is not working under MySQL. TODO: Trigger search index rebuild on upgrade to 1.1.4.
Dan
parents:
461
diff
changeset
|
96 |
* @param string|reference Will be filled with any warnings encountered whilst parsing the query |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
97 |
* @param bool Case sensitivity - defaults to false |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
98 |
* @param array|reference Will be filled with the parsed list of words. |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
99 |
* @return array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
100 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
101 |
|
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
102 |
function perform_search($query, &$warnings, $case_sensitive = false, &$word_list) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
103 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
104 |
global $db, $session, $paths, $template, $plugins; // Common objects |
335
67bd3121a12e
Replaced TinyMCE 2.x with 3.0 beta 3. Supports everything but IE. Also rewrote the editor interface completely from the ground up.
Dan
parents:
334
diff
changeset
|
105 |
global $lang; |
67bd3121a12e
Replaced TinyMCE 2.x with 3.0 beta 3. Supports everything but IE. Also rewrote the editor interface completely from the ground up.
Dan
parents:
334
diff
changeset
|
106 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
107 |
$warnings = array(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
108 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
109 |
$query = parse_search_query($query, $warnings); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
110 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
111 |
// Segregate search terms containing spaces |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
112 |
$query_phrase = array( |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
113 |
'any' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
114 |
'req' => array() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
115 |
); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
116 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
117 |
foreach ( $query['any'] as $i => $_ ) |
1 | 118 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
119 |
$term =& $query['any'][$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
120 |
$term = trim($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
121 |
// the indexer only indexes words a-z with apostrophes |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
122 |
if ( preg_match('/[^A-Za-z\']/', $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
123 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
124 |
$query_phrase['any'][] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
125 |
unset($term, $query['any'][$i]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
126 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
127 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
128 |
unset($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
129 |
$query['any'] = array_values($query['any']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
130 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
131 |
foreach ( $query['req'] as $i => $_ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
132 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
133 |
$term =& $query['req'][$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
134 |
$term = trim($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
135 |
if ( preg_match('/[^A-Za-z\']/', $term) ) |
1 | 136 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
137 |
$query_phrase['req'][] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
138 |
unset($term, $query['req'][$i]); |
1 | 139 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
140 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
141 |
unset($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
142 |
$query['req'] = array_values($query['req']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
143 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
144 |
$results = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
145 |
$scores = array(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
146 |
$ns_list = '(' . implode('|', array_keys($paths->nslist)) . ')'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
147 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
148 |
// FIXME: Update to use FULLTEXT algo when available. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
149 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
150 |
// Build an SQL query to load from the index table |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
151 |
if ( count($query['any']) < 1 && count($query['req']) < 1 && count($query_phrase['any']) < 1 && count($query_phrase['req']) < 1 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
152 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
153 |
// This is both because of technical restrictions and devastation that would occur on shared servers/large sites. |
391
85f91037cd4f
Localization is FINISHED, DAMN IT HELLAH YEAH! OVER WITH! Man, it feels to get that off my chest. Release is in under 48 hours, folks. And we're ready for it.
Dan
parents:
387
diff
changeset
|
154 |
$warnings[] = $lang->get('search_err_query_no_positive'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
155 |
return array(); |
1 | 156 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
157 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
158 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
159 |
// STAGE 1 |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
160 |
// Get all possible result pages from the search index. Tally which pages have the most words, and later sort them by boolean relevance |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
161 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
162 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
163 |
// Skip this if no indexable words are included |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
164 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
165 |
if ( count($query['any']) > 0 || count($query['req']) > 0 ) |
1 | 166 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
167 |
$where_any = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
168 |
foreach ( $query['any'] as $term ) |
1 | 169 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
170 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
171 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
172 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
173 |
$where_any[] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
174 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
175 |
foreach ( $query['req'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
176 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
177 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
178 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
179 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
180 |
$where_any[] = $term; |
1 | 181 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
182 |
|
499
6b7fdd898ba3
Fixed some bugs with PostgreSQL and added a word_lcase column to the search_index table because collation is not working under MySQL. TODO: Trigger search index rebuild on upgrade to 1.1.4.
Dan
parents:
461
diff
changeset
|
183 |
$col_word = ( $case_sensitive ) ? 'word' : 'word_lcase'; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
184 |
$where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
185 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
186 |
// generate query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
187 |
// using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
188 |
// into one later in the processing stages |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
189 |
// $group_by = ( $case_sensitive ) ? '' : ' GROUP BY lcase(word);'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
190 |
$sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any}"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
191 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
192 |
$db->_die('Error is in perform_search(), includes/search.php, query 1'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
193 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
194 |
$word_tracking = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
195 |
if ( $row = $db->fetchrow() ) |
1 | 196 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
197 |
do |
1 | 198 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
199 |
// get page list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
200 |
$pages =& $row['page_names']; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
201 |
if ( strpos($pages, ',') ) |
1 | 202 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
203 |
// the term occurs in more than one page |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
204 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
205 |
// Find page IDs that contain commas |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
206 |
// This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
207 |
// databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
208 |
// IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
209 |
// of the previous ID and should be concatenated to the previous entry. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
210 |
$matches = explode(',', $pages); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
211 |
$prev = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
212 |
foreach ( $matches as $i => $_ ) |
1 | 213 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
214 |
$match =& $matches[$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
215 |
if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev ) |
1 | 216 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
217 |
$matches[$prev] .= ',' . $match; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
218 |
unset($match, $matches[$i]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
219 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
220 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
221 |
$prev = $i; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
222 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
223 |
unset($match); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
224 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
225 |
// Iterate through each of the results, assigning scores based on how many times the page has shown up. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
226 |
// This works because this phase of the search is strongly word-based not page-based. If a page shows up |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
227 |
// multiple times while fetching the result rows from the search_index table, it simply means that page |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
228 |
// contains more than one of the terms the user searched for. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
229 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
230 |
foreach ( $matches as $match ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
231 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
232 |
$word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
233 |
if ( isset($word_tracking[$match]) && in_array($word_cs, $word_tracking[$match]) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
234 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
235 |
continue; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
236 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
237 |
if ( isset($word_tracking[$match]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
238 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
239 |
if ( isset($word_tracking[$match]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
240 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
241 |
$word_tracking[$match][] = ($word_cs); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
242 |
} |
1 | 243 |
} |
244 |
else |
|
245 |
{ |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
246 |
$word_tracking[$match] = array($word_cs); |
1 | 247 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
248 |
$inc = 1; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
249 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
250 |
// Is this search term present in the page's title? If so, give extra points |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
251 |
preg_match("/^ns=$ns_list;pid=(.+)$/", $match, $piecesparts); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
252 |
$pathskey = $paths->nslist[ $piecesparts[1] ] . sanitize_page_id($piecesparts[2]); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
253 |
if ( isset($paths->pages[$pathskey]) ) |
1 | 254 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
255 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
256 |
if ( $test_func($paths->pages[$pathskey]['name'], $row['word']) || $test_func($paths->pages[$pathskey]['urlname_nons'], $row['word']) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
257 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
258 |
$inc = 1.5; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
259 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
260 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
261 |
if ( isset($scores[$match]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
262 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
263 |
$scores[$match] = $scores[$match] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
264 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
265 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
266 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
267 |
$scores[$match] = $inc; |
1 | 268 |
} |
269 |
} |
|
270 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
271 |
else |
1 | 272 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
273 |
// the term only occurs in one page |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
274 |
$word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
275 |
if ( isset($word_tracking[$pages]) && in_array($word_cs, $word_tracking[$pages]) ) |
1 | 276 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
277 |
continue; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
278 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
279 |
if ( isset($word_tracking[$pages]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
280 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
281 |
if ( isset($word_tracking[$pages]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
282 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
283 |
$word_tracking[$pages][] = ($word_cs); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
284 |
} |
1 | 285 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
286 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
287 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
288 |
$word_tracking[$pages] = array($word_cs); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
289 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
290 |
$inc = 1; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
291 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
292 |
// Is this search term present in the page's title? If so, give extra points |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
293 |
preg_match("/^ns=$ns_list;pid=(.+)$/", $pages, $piecesparts); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
294 |
$pathskey = $paths->nslist[ $piecesparts[1] ] . sanitize_page_id($piecesparts[2]); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
295 |
if ( isset($paths->pages[$pathskey]) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
296 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
297 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
298 |
if ( $test_func($paths->pages[$pathskey]['name'], $row['word']) || $test_func($paths->pages[$pathskey]['urlname_nons'], $row['word']) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
299 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
300 |
$inc = 1.5; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
301 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
302 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
303 |
if ( isset($scores[$pages]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
304 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
305 |
$scores[$pages] = $scores[$pages] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
306 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
307 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
308 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
309 |
$scores[$pages] = $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
310 |
} |
1 | 311 |
} |
312 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
313 |
while ( $row = $db->fetchrow() ); |
1 | 314 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
315 |
$db->free_result(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
316 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
317 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
318 |
// STAGE 2: FIRST ELIMINATION ROUND |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
319 |
// Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
320 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
321 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
322 |
foreach ( $query['req'] as $term ) |
1 | 323 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
324 |
foreach ( $word_tracking as $i => $page ) |
1 | 325 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
326 |
if ( !in_array($term, $page) ) |
1 | 327 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
328 |
unset($word_tracking[$i], $scores[$i]); |
1 | 329 |
} |
330 |
} |
|
331 |
} |
|
332 |
} |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
333 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
334 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
335 |
// STAGE 3: PHRASE SEARCHING |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
336 |
// Use LIKE to find pages with specified phrases. We can do a super-picky single query without another elimination round because |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
337 |
// at this stage we can search the full page_text column instead of relying on a word list. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
338 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
339 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
340 |
// We can skip this stage if none of these special terms apply |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
341 |
|
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
342 |
$text_col = ( $case_sensitive ) ? 'page_text' : ENANO_SQLFUNC_LOWERCASE . '(page_text)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
343 |
$name_col = ( $case_sensitive ) ? 'name' : ENANO_SQLFUNC_LOWERCASE . '(name)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
344 |
$text_col_join = ( $case_sensitive ) ? 't.page_text' : ENANO_SQLFUNC_LOWERCASE . '(t.page_text)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
345 |
$name_col_join = ( $case_sensitive ) ? 'p.name' : ENANO_SQLFUNC_LOWERCASE . '(p.name)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
346 |
|
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
347 |
$concat_column = ( ENANO_DBLAYER == 'MYSQL' ) ? |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
348 |
'CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id)' : |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
349 |
"'ns=' || t.namespace || ';pid=' || t.page_id"; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
350 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
351 |
if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
352 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
353 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
354 |
$where_any = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
355 |
foreach ( $query_phrase['any'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
356 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
357 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
358 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
359 |
$term = strtolower($term); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
360 |
$where_any[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
361 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
362 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
363 |
$where_any = ( count($where_any) > 0 ) ? implode(" OR\n ", $where_any) : ''; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
364 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
365 |
// Also do required terms, but use AND to ensure that all required terms are included |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
366 |
$where_req = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
367 |
foreach ( $query_phrase['req'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
368 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
369 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
370 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
371 |
$term = strtolower($term); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
372 |
$where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
373 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
374 |
$and_clause = ( $where_any != '' ) ? 'AND ' : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
375 |
$where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n ", $where_req) : ''; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
376 |
|
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
377 |
$sql = 'SELECT ' . $concat_column . ' AS id, p.name FROM ' . table_prefix . "page_text AS t\n" |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
378 |
. " LEFT JOIN " . table_prefix . "pages AS p\n" |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
379 |
. " ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n" |
567
7f196509bf78
Actually made the "prevent search indexing" switch work. Needs to be merged with stable.
Dan
parents:
536
diff
changeset
|
380 |
. " WHERE p.visible = 1 AND (\n $where_any\n $where_req\n );"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
381 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
382 |
$db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
383 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
384 |
if ( $row = $db->fetchrow() ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
385 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
386 |
do |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
387 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
388 |
$id =& $row['id']; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
389 |
$inc = 1; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
390 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
391 |
// Is this search term present in the page's title? If so, give extra points |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
392 |
preg_match("/^ns=$ns_list;pid=(.+)$/", $id, $piecesparts); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
393 |
$pathskey = $paths->nslist[ $piecesparts[1] ] . sanitize_page_id($piecesparts[2]); |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
394 |
if ( isset($paths->pages[$pathskey]) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
395 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
396 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
397 |
foreach ( array_merge($query_phrase['any'], $query_phrase['req']) as $term ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
398 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
399 |
if ( $test_func($paths->pages[$pathskey]['name'], $term) || $test_func($paths->pages[$pathskey]['urlname_nons'], $term) ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
400 |
{ |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
401 |
$inc = 1.5; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
402 |
break; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
403 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
404 |
} |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
405 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
406 |
if ( isset($scores[$id]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
407 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
408 |
$scores[$id] = $scores[$id] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
409 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
410 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
411 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
412 |
$scores[$id] = $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
413 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
414 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
415 |
while ( $row = $db->fetchrow() ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
416 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
417 |
$db->free_result(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
418 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
419 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
420 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
421 |
// STAGE 4 - SELECT PAGE TEXT AND ELIMINATE NOTS |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
422 |
// At this point, we have a complete list of all the possible pages. Now we want to obtain the page text, and within the same query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
423 |
// eliminate any terms that shouldn't be in there. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
424 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
425 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
426 |
// Generate master word list for the highlighter |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
427 |
$word_list = array_values(array_merge($query['any'], $query['req'], $query_phrase['any'], $query_phrase['req'])); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
428 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
429 |
$text_where = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
430 |
foreach ( $scores as $page_id => $_ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
431 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
432 |
$text_where[] = $db->escape($page_id); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
433 |
} |
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
434 |
$text_where = '( ' . $concat_column . ' = \'' . implode('\' OR ' . $concat_column . ' = \'', $text_where) . '\' )'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
435 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
436 |
if ( count($query['not']) > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
437 |
$text_where .= ' AND'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
438 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
439 |
$where_not = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
440 |
foreach ( $query['not'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
441 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
442 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
443 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
444 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
445 |
$where_not[] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
446 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
447 |
$where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
448 |
|
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
449 |
$sql = 'SELECT ' . $concat_column . ' AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
450 |
LEFT JOIN " . table_prefix . "pages AS p |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
451 |
ON ( p.urlname = t.page_id AND p.namespace = t.namespace ) |
567
7f196509bf78
Actually made the "prevent search indexing" switch work. Needs to be merged with stable.
Dan
parents:
536
diff
changeset
|
452 |
WHERE p.visible = 1 AND ( $text_where $where_not );"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
453 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
454 |
$db->_die('Error is in perform_search(), includes/search.php, query 3'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
455 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
456 |
$page_data = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
457 |
if ( $row = $db->fetchrow() ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
458 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
459 |
do |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
460 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
461 |
$row['page_text'] = htmlspecialchars($row['page_text']); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
462 |
$row['page_name'] = htmlspecialchars($row['page_name']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
463 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
464 |
// Highlight results (this is wonderfully automated) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
465 |
$row['page_text'] = highlight_and_clip_search_result($row['page_text'], $word_list, $case_sensitive); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
466 |
if ( strlen($row['page_text']) > 250 && !preg_match('/^\.\.\.(.+)\.\.\.$/', $row['page_text']) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
467 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
468 |
$row['page_text'] = substr($row['page_text'], 0, 150) . '...'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
469 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
470 |
$row['page_name'] = highlight_search_result($row['page_name'], $word_list, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
471 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
472 |
$page_data[$row['id']] = $row; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
473 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
474 |
while ( $row = $db->fetchrow() ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
475 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
476 |
$db->free_result(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
477 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
478 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
479 |
// STAGE 5 - SPECIAL PAGE TITLE SEARCH |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
480 |
// Iterate through $paths->pages and check the titles for search terms. Score accordingly. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
481 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
482 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
483 |
foreach ( $paths->pages as $id => $page ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
484 |
{ |
567
7f196509bf78
Actually made the "prevent search indexing" switch work. Needs to be merged with stable.
Dan
parents:
536
diff
changeset
|
485 |
if ( $page['namespace'] != 'Special' || $page['visible'] == 0 ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
486 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
487 |
$idstring = 'ns=' . $page['namespace'] . ';pid=' . $page['urlname_nons']; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
488 |
$any = array_values(array_unique(array_merge($query['any'], $query_phrase['any']))); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
489 |
foreach ( $any as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
490 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
491 |
if ( $case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
492 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
493 |
if ( strstr($page['name'], $term) || strstr($page['urlname_nons'], $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
494 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
495 |
( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
496 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
497 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
498 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
499 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
500 |
if ( stristr($page['name'], $term) || stristr($page['urlname_nons'], $term) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
501 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
502 |
( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
503 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
504 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
505 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
506 |
if ( isset($scores[$idstring]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
507 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
508 |
$page_data[$idstring] = array( |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
509 |
'page_name' => highlight_search_result($page['name'], $word_list, $case_sensitive), |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
510 |
'page_text' => '', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
511 |
'page_id' => $page['urlname_nons'], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
512 |
'namespace' => $page['namespace'], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
513 |
'score' => $scores[$idstring], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
514 |
'page_length' => 1, |
335
67bd3121a12e
Replaced TinyMCE 2.x with 3.0 beta 3. Supports everything but IE. Also rewrote the editor interface completely from the ground up.
Dan
parents:
334
diff
changeset
|
515 |
'page_note' => '[' . $lang->get('search_result_tag_special') . ']' |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
516 |
); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
517 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
518 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
519 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
520 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
521 |
// STAGE 6 - SECOND ELIMINATION ROUND |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
522 |
// Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
523 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
524 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
525 |
$required = array_merge($query['req'], $query_phrase['req']); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
526 |
foreach ( $required as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
527 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
528 |
foreach ( $page_data as $id => $page ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
529 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
530 |
if ( ( $page['namespace'] == 'Special' || ( $page['namespace'] != 'Special' && !strstr($page['page_text'], $term) ) ) && !strstr($page['page_id'], $term) && !strstr($page['page_name'], $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
531 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
532 |
unset($page_data[$id]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
533 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
534 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
535 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
536 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
537 |
// At this point, all of our normal results are in. However, we can also allow plugins to hook into the system and score their own |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
538 |
// pages and add text, etc. as necessary. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
539 |
// Plugins are COMPLETELY responsible for using the search terms and handling Boolean logic properly |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
540 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
541 |
$code = $plugins->setHook('search_global_inner'); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
542 |
foreach ( $code as $cmd ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
543 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
544 |
eval($cmd); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
545 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
546 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
547 |
// a marvelous debugging aid :-) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
548 |
// die('<pre>' . htmlspecialchars(print_r($page_data, true)) . '</pre>'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
549 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
550 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
551 |
// STAGE 7 - HIGHLIGHT, TRIM, AND SCORE RESULTS |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
552 |
// We now have the complete results of the search. We need to trim text down to show only portions of the page containing search |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
553 |
// terms, highlight any search terms within the page, and sort the final results array in descending order of score. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
554 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
555 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
556 |
// Sort scores array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
557 |
arsort($scores); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
558 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
559 |
// Divisor for calculating relevance scores |
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
560 |
$divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5; |
461 | 561 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
562 |
foreach ( $scores as $page_id => $score ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
563 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
564 |
if ( !isset($page_data[$page_id]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
565 |
// It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
566 |
continue; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
567 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
568 |
// Make a copy of the datum, then delete the original (it frees up a LOT of RAM) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
569 |
$datum = $page_data[$page_id]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
570 |
unset($page_data[$page_id]); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
571 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
572 |
// This is an internal value used for sorting - it's no longer needed. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
573 |
unset($datum['id']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
574 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
575 |
// Calculate score |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
576 |
// if ( $score > $divisor ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
577 |
// $score = $divisor; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
578 |
$datum['score'] = round($score / $divisor, 2) * 100; |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
579 |
|
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
580 |
// Highlight the URL |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
581 |
$datum['url_highlight'] = makeUrlComplete($datum['namespace'], $datum['page_id']); |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
582 |
$datum['url_highlight'] = preg_replace('/\?.+$/', '', $datum['url_highlight']); |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
583 |
$datum['url_highlight'] = highlight_search_result($datum['url_highlight'], $word_list, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
584 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
585 |
// Store it in our until-now-unused results array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
586 |
$results[] = $datum; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
587 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
588 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
589 |
// Our work here is done. :-D |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
590 |
return $results; |
1 | 591 |
} |
592 |
||
593 |
/** |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
594 |
* Parses a search query into an associative array. The resultant array will be filled with the following values, each an array: |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
595 |
* any: Search terms that can optionally be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
596 |
* req: Search terms that must be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
597 |
* not: Search terms that should not be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
598 |
* @param string Search query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
599 |
* @param array Will be filled with parser warnings, such as query too short, words too short, etc. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
600 |
* @return array |
1 | 601 |
*/ |
602 |
||
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
603 |
function parse_search_query($query, &$warnings) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
604 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
605 |
global $lang; |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
606 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
607 |
$stopwords = get_stopwords(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
608 |
$ret = array( |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
609 |
'any' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
610 |
'req' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
611 |
'not' => array() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
612 |
); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
613 |
$warnings = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
614 |
$terms = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
615 |
$in_quote = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
616 |
$start_term = 0; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
617 |
$just_finished = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
618 |
for ( $i = 0; $i < strlen($query); $i++ ) |
1 | 619 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
620 |
$chr = $query{$i}; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
621 |
$prev = ( $i > 0 ) ? $query{ $i - 1 } : ''; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
622 |
$next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
623 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
624 |
if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
625 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
626 |
$len = ( $next == '' ) ? $i + 1 : $i - $start_term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
627 |
$word = substr ( $query, $start_term, $len ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
628 |
$terms[] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
629 |
$start_term = $i + 1; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
630 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
631 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
632 |
elseif ( $chr == '"' && $in_quote && $prev != '\\' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
633 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
634 |
$word = substr ( $query, $start_term, $i - $start_term + 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
635 |
$start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
636 |
$in_quote = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
637 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
638 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
639 |
elseif ( $chr == '"' && !$in_quote ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
640 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
641 |
$in_quote = true; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
642 |
$start_pos = $i; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
643 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
644 |
|
1 | 645 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
646 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
647 |
$ticker = 0; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
648 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
649 |
foreach ( $terms as $element => $__unused ) |
1 | 650 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
651 |
$atom =& $terms[$element]; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
652 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
653 |
$ticker++; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
654 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
655 |
if ( $ticker == 20 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
656 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
657 |
$warnings[] = $lang->get('search_err_query_too_many_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
658 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
659 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
660 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
661 |
if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
662 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
663 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
664 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
665 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
666 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
667 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
668 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
669 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
670 |
if(in_array($word, $ret['req'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
671 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
672 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
673 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
674 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
675 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
676 |
$ret['req'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
677 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
678 |
elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
679 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
680 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
681 |
if ( strlen ( $word ) < 4 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
682 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
683 |
$warnings[] = $lang->get('search_err_query_term_too_short'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
684 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
685 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
686 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
687 |
if(in_array($word, $ret['not'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
688 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
689 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
690 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
691 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
692 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
693 |
$ret['not'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
694 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
695 |
elseif ( substr ( $atom, 0, 1 ) == '+' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
696 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
697 |
$word = substr ( $atom, 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
698 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
699 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
700 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
701 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
702 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
703 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
704 |
if(in_array($word, $ret['req'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
705 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
706 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
707 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
708 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
709 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
710 |
$ret['req'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
711 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
712 |
elseif ( substr ( $atom, 0, 1 ) == '-' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
713 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
714 |
$word = substr ( $atom, 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
715 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
716 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
717 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
718 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
719 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
720 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
721 |
if(in_array($word, $ret['not'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
722 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
723 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
724 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
725 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
726 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
727 |
$ret['not'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
728 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
729 |
elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
730 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
731 |
$word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
732 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
733 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
734 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
735 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
736 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
737 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
738 |
if(in_array($word, $ret['any'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
739 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
740 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
741 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
742 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
743 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
744 |
$ret['any'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
745 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
746 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
747 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
748 |
$word = $atom; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
749 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
750 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
751 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
752 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
753 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
754 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
755 |
if(in_array($word, $ret['any'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
756 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
757 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
758 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
759 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
760 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
761 |
$ret['any'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
762 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
763 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
764 |
return $ret; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
765 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
766 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
767 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
768 |
* Escapes a string for use in a LIKE clause. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
769 |
* @param string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
770 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
771 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
772 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
773 |
function escape_string_like($string) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
774 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
775 |
global $db, $session, $paths, $template, $plugins; // Common objects |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
776 |
$string = $db->escape($string); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
777 |
$string = str_replace(array('%', '_'), array('\%', '\_'), $string); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
778 |
return $string; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
779 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
780 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
781 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
782 |
* Wraps <highlight></highlight> tags around all words in both the specified array. Does not perform any clipping. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
783 |
* @param string Text to process |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
784 |
* @param array Word list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
785 |
* @param bool If true, searches case-sensitively when highlighting words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
786 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
787 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
788 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
789 |
function highlight_search_result($pt, $words, $case_sensitive = false) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
790 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
791 |
$words2 = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
792 |
for ( $i = 0; $i < sizeof($words); $i++) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
793 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
794 |
if(!empty($words[$i])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
795 |
$words2[] = preg_quote($words[$i]); |
1 | 796 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
797 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
798 |
$flag = ( $case_sensitive ) ? '' : 'i'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
799 |
$regex = '/(' . implode('|', $words2) . ')/' . $flag; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
800 |
$pt = preg_replace($regex, '<highlight>\\1</highlight>', $pt); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
801 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
802 |
return $pt; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
803 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
804 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
805 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
806 |
* Wraps <highlight></highlight> tags around all words in both the specified array and the specified text and clips the text to |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
807 |
* an appropriate length. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
808 |
* @param string Text to process |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
809 |
* @param array Word list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
810 |
* @param bool If true, searches case-sensitively when highlighting words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
811 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
812 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
813 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
814 |
function highlight_and_clip_search_result($pt, $words, $case_sensitive = false) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
815 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
816 |
$cut_off = false; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
817 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
818 |
$space_chars = Array("\t", "\n", "\r", " "); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
819 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
820 |
$pt = highlight_search_result($pt, $words, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
821 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
822 |
foreach ( $words as $word ) |
1 | 823 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
824 |
// Boldface searched words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
825 |
$ptlen = strlen($pt); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
826 |
for ( $i = 0; $i < $ptlen; $i++ ) |
1 | 827 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
828 |
$len = strlen($word); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
829 |
if ( strtolower(substr($pt, $i, $len)) == strtolower($word) ) |
1 | 830 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
831 |
$chunk1 = substr($pt, 0, $i); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
832 |
$chunk2 = substr($pt, $i, $len); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
833 |
$chunk3 = substr($pt, ( $i + $len )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
834 |
$pt = $chunk1 . $chunk2 . $chunk3; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
835 |
$ptlen = strlen($pt); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
836 |
// Cut off text to 150 chars or so |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
837 |
if ( !$cut_off ) |
1 | 838 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
839 |
$cut_off = true; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
840 |
if ( $i - 75 > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
841 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
842 |
// Navigate backwards until a space character is found |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
843 |
$chunk = substr($pt, 0, ( $i - 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
844 |
$final_chunk = $chunk; |
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
845 |
for ( $j = strlen($chunk) - 1; $j > 0; $j = $j - 1 ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
846 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
847 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
848 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
849 |
$final_chunk = substr($chunk, $j + 1); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
850 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
851 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
852 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
853 |
$mid_chunk = substr($pt, ( $i - 75 ), 75); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
854 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
855 |
$clipped = '...' . $final_chunk . $mid_chunk . $chunk2; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
856 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
857 |
$chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
858 |
$final_chunk = $chunk; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
859 |
for ( $j = 0; $j < strlen($chunk); $j++ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
860 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
861 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
862 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
863 |
$final_chunk = substr($chunk, 0, $j); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
864 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
865 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
866 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
867 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
868 |
$end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
869 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
870 |
$clipped .= $end_chunk . $final_chunk . '...'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
871 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
872 |
$pt = $clipped; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
873 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
874 |
else if ( strlen($pt) > 200 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
875 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
876 |
$mid_chunk = substr($pt, ( $i - 75 ), 75); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
877 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
878 |
$clipped = $chunk1 . $chunk2; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
879 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
880 |
$chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
881 |
$final_chunk = $chunk; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
882 |
for ( $j = 0; $j < strlen($chunk); $j++ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
883 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
884 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
885 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
886 |
$final_chunk = substr($chunk, 0, $j); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
887 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
888 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
889 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
890 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
891 |
$end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
892 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
893 |
$clipped .= $end_chunk . $final_chunk . '...'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
894 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
895 |
$pt = $clipped; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
896 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
897 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
898 |
break 2; |
1 | 899 |
} |
900 |
} |
|
901 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
902 |
$cut_off = false; |
1 | 903 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
904 |
return $pt; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
905 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
906 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
907 |
/** |
461 | 908 |
* Returns a list of words that shouldn't under most circumstances be indexed for searching. |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
909 |
* @return array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
910 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
911 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
912 |
function get_stopwords() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
913 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
914 |
static $stopwords; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
915 |
if ( is_array($stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
916 |
return $stopwords; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
917 |
|
461 | 918 |
$stopwords = array('I', 'a', 'about', 'an', 'are', 'as', 'at', 'be', 'by', 'com', 'de', 'en', 'for', 'from', 'how', 'in', 'is', 'it', |
919 |
'la', 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', 'who', 'will', 'with', 'and', |
|
920 |
'the'); |
|
921 |
||
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
922 |
return $stopwords; |
1 | 923 |
} |
924 |
||
925 |
?> |