A fix in the JS dumper and further compression by omitting redundant braces.

This commit is contained in:
Georg Brandl
2008-09-24 11:51:02 +00:00
parent 3d3bcb00df
commit 0830a04bbf
3 changed files with 44 additions and 26 deletions

View File

@@ -32,6 +32,7 @@ that the their then there these they this to
was will with
""".split())
class _JavaScriptIndex(object):
"""
The search index as javascript file that calls a function
@@ -44,8 +45,7 @@ class _JavaScriptIndex(object):
SUFFIX = ')'
def dumps(self, data):
return self.PREFIX + json.dumps(data, separators=(',', ':')) \
+ self.SUFFIX
return self.PREFIX + json.dumps(data) + self.SUFFIX
def loads(self, s):
data = s[len(self.PREFIX):-len(self.SUFFIX)]
@@ -119,8 +119,12 @@ class IndexBuilder(object):
raise ValueError('old format')
index2fn = frozen['filenames']
self._titles = dict(zip(index2fn, frozen['titles']))
self._mapping = dict((k, set(index2fn[i] for i in v))
for (k, v) in frozen['terms'].iteritems())
self._mapping = {}
for k, v in frozen['terms'].iteritems():
if isinstance(v, int):
self._mapping[k] = set([index2fn[v]])
else:
self._mapping[k] = set(index2fn[i] for i in v)
# no need to load keywords/desctypes
def dump(self, stream, format):
@@ -149,6 +153,16 @@ class IndexBuilder(object):
pdict[name] = (fn2index[doc], i)
return rv
def get_terms(self, fn2index):
rv = {}
for k, v in self._mapping.iteritems():
if len(v) == 1:
fn, = v
rv[k] = fn2index[fn]
else:
rv[k] = [fn2index[fn] for fn in v]
return rv
def freeze(self):
"""Create a usable data structure for serializing."""
filenames = self._titles.keys()
@@ -157,8 +171,7 @@ class IndexBuilder(object):
return dict(
filenames=filenames,
titles=titles,
terms=dict((k, [fn2index[fn] for fn in v])
for (k, v) in self._mapping.iteritems()),
terms=self.get_terms(fn2index),
descrefs=self.get_descrefs(fn2index),
modules=self.get_modules(fn2index),
desctypes=dict((v, k) for (k, v) in self._desctypes.items()),

View File

@@ -287,12 +287,12 @@ var Search = {
},
query : function(query) {
// stem the searchwords and add them to the
// stem the searchterms and add them to the
// correct list
var stemmer = new PorterStemmer();
var searchwords = [];
var searchterms = [];
var excluded = [];
var hlwords = [];
var hlterms = [];
var tmp = query.split(/\s+/);
var object = (tmp.length == 1) ? tmp[0].toLowerCase() : null;
for (var i = 0; i < tmp.length; i++) {
@@ -304,23 +304,23 @@ var Search = {
word = word.substr(1);
}
else {
var toAppend = searchwords;
hlwords.push(tmp[i].toLowerCase());
var toAppend = searchterms;
hlterms.push(tmp[i].toLowerCase());
}
// only add if not already in the list
if (!$.contains(toAppend, word))
toAppend.push(word);
};
var highlightstring = '?highlight=' + $.urlencode(hlwords.join(" "));
var highlightstring = '?highlight=' + $.urlencode(hlterms.join(" "));
console.debug('SEARCH: searching for:');
console.info('required: ', searchwords);
console.info('required: ', searchterms);
console.info('excluded: ', excluded);
// prepare search
var filenames = this._index.filenames;
var titles = this._index.titles;
var words = this._index.terms;
var terms = this._index.terms;
var descrefs = this._index.descrefs;
var modules = this._index.modules;
var desctypes = this._index.desctypes;
@@ -343,7 +343,7 @@ var Search = {
for (var name in descrefs[prefix]) {
if (name.toLowerCase().indexOf(object) > -1) {
match = descrefs[prefix][name];
fullname = prefix + '.' + name;
fullname = (prefix ? prefix + '.' : '') + name;
descr = desctypes[match[1]] + _(', in ') + titles[match[0]];
objectResults.push([filenames[match[0]], fullname, '#'+fullname, descr]);
}
@@ -357,12 +357,15 @@ var Search = {
});
// perform the search on the required words
for (var i = 0; i < searchwords.length; i++) {
var word = searchwords[i];
// perform the search on the required terms
for (var i = 0; i < searchterms.length; i++) {
var word = searchterms[i];
// no match but word was a required one
if ((files = words[word]) == null)
if ((files = terms[word]) == null)
break;
if (files.length == undefined) {
files = [files];
}
// create the mapping
for (var j = 0; j < files.length; j++) {
var file = files[j];
@@ -373,18 +376,19 @@ var Search = {
}
}
// now check if the files don't contain excluded words
// now check if the files don't contain excluded terms
for (var file in fileMap) {
var valid = true;
// check if all requirements are matched
if (fileMap[file].length != searchwords.length)
if (fileMap[file].length != searchterms.length)
continue;
// ensure that none of the excluded words is in the
// ensure that none of the excluded terms is in the
// search result.
for (var i = 0; i < excluded.length; i++) {
if ($.contains(words[excluded[i]] || [], file)) {
if (terms[excluded[i]] == file ||
$.contains(terms[excluded[i]] || [], file)) {
valid = false;
break;
}
@@ -398,7 +402,7 @@ var Search = {
// delete unused variables in order to not waste
// memory until list is retrieved completely
delete filenames, titles, words;
delete filenames, titles, terms;
// now sort the regular results descending by title
regularResults.sort(function(a, b) {
@@ -429,7 +433,7 @@ var Search = {
});
} else {
$.get('_sources/' + item[0] + '.txt', function(data) {
listItem.append($.makeSearchSummary(data, searchwords, hlwords));
listItem.append($.makeSearchSummary(data, searchterms, hlterms));
Search.output.append(listItem);
listItem.slideDown(5, function() {
displayNextItem();

View File

@@ -20,6 +20,7 @@ import re
_str_re = re.compile(r'"(\\\\|\\"|[^"])*"')
_int_re = re.compile(r'\d+')
_name_re = re.compile(r'[a-zA-Z]\w*')
_nameonly_re = re.compile(r'[a-zA-Z]\w*$')
# escape \, ", control characters and everything outside ASCII
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
@@ -78,7 +79,7 @@ def dumps(obj, key=False):
if key:
if not isinstance(obj, basestring):
obj = str(obj)
if _name_re.match(obj) and obj not in reswords:
if _nameonly_re.match(obj) and obj not in reswords:
return obj # return it as a bare word
else:
return encode_string(obj)