mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #2063 from TimKam/patch-1
Bugfix: Dutch stop words were incorrect (Danish)
This commit is contained in:
commit
470bac3d1c
1
AUTHORS
1
AUTHORS
@ -28,6 +28,7 @@ Other contributors, listed alphabetically, are:
|
|||||||
* Horst Gutmann -- internationalization support
|
* Horst Gutmann -- internationalization support
|
||||||
* Martin Hans -- autodoc improvements
|
* Martin Hans -- autodoc improvements
|
||||||
* Doug Hellmann -- graphviz improvements
|
* Doug Hellmann -- graphviz improvements
|
||||||
|
* Timotheus Kampik - stop words language fix
|
||||||
* Takeshi Komiya -- numref feature
|
* Takeshi Komiya -- numref feature
|
||||||
* Dave Kuhlman -- original LaTeX writer
|
* Dave Kuhlman -- original LaTeX writer
|
||||||
* Blaise Laflamme -- pyramid theme
|
* Blaise Laflamme -- pyramid theme
|
||||||
|
1
CHANGES
1
CHANGES
@ -69,6 +69,7 @@ Bugs fixed
|
|||||||
* C++, add missing support for virtual base classes (thanks to Rapptz).
|
* C++, add missing support for virtual base classes (thanks to Rapptz).
|
||||||
* C++, add support for final classes.
|
* C++, add support for final classes.
|
||||||
* C++, fix parsing of types prefixed with 'enum'.
|
* C++, fix parsing of types prefixed with 'enum'.
|
||||||
|
* #2023: Dutch search support uses Danish stemming info
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
-------------
|
-------------
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
sphinx.search.nl
|
sphinx.search.nl
|
||||||
~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
Danish search language: includes the JS porter stemmer.
|
Dutch search language: includes the JS porter stemmer.
|
||||||
|
|
||||||
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
|
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
|
||||||
:license: BSD, see LICENSE for details.
|
:license: BSD, see LICENSE for details.
|
||||||
@ -13,102 +13,109 @@ from sphinx.search import SearchLanguage, parse_stop_word
|
|||||||
|
|
||||||
import snowballstemmer
|
import snowballstemmer
|
||||||
|
|
||||||
danish_stopwords = parse_stop_word(u'''
|
dutch_stopwords = parse_stop_word(u'''
|
||||||
| source: http://snowball.tartarus.org/algorithms/danish/stop.txt
|
| source: http://snowball.tartarus.org/algorithms/dutch/stop.txt
|
||||||
og | and
|
de | the
|
||||||
i | in
|
en | and
|
||||||
jeg | I
|
van | of, from
|
||||||
det | that (dem. pronoun)/it (pers. pronoun)
|
ik | I, the ego
|
||||||
at | that (in front of a sentence)/to (with infinitive)
|
te | (1) chez, at etc, (2) to, (3) too
|
||||||
en | a/an
|
dat | that, which
|
||||||
den | it (pers. pronoun)/that (dem. pronoun)
|
die | that, those, who, which
|
||||||
til | to/at/for/until/against/by/of/into, more
|
in | in, inside
|
||||||
er | present tense of "to be"
|
een | a, an, one
|
||||||
som | who, as
|
hij | he
|
||||||
på | on/upon/in/on/at/to/after/of/with/for, on
|
het | the, it
|
||||||
de | they
|
niet | not, nothing, naught
|
||||||
med | with/by/in, along
|
zijn | (1) to be, being, (2) his, one's, its
|
||||||
han | he
|
is | is
|
||||||
af | of/by/from/off/for/in/with/on, off
|
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
|
||||||
for | at/for/to/from/by/of/ago, in front/before, because
|
op | on, upon, at, in, up, used up
|
||||||
ikke | not
|
aan | on, upon, to (as dative)
|
||||||
der | who/which, there/those
|
met | with, by
|
||||||
var | past tense of "to be"
|
als | like, such as, when
|
||||||
mig | me/myself
|
voor | (1) before, in front of, (2) furrow
|
||||||
sig | oneself/himself/herself/itself/themselves
|
had | had, past tense all persons sing. of 'hebben' (have)
|
||||||
men | but
|
er | there
|
||||||
et | a/an/one, one (number), someone/somebody/one
|
maar | but, only
|
||||||
har | present tense of "to have"
|
om | round, about, for etc
|
||||||
om | round/about/for/in/a, about/around/down, if
|
hem | him
|
||||||
vi | we
|
dan | then
|
||||||
min | my
|
zou | should/would, past tense all persons sing. of 'zullen'
|
||||||
havde | past tense of "to have"
|
of | or, whether, if
|
||||||
ham | him
|
wat | what, something, anything
|
||||||
hun | she
|
mijn | possessive and noun 'mine'
|
||||||
nu | now
|
men | people, 'one'
|
||||||
over | over/above/across/by/beyond/past/on/about, over/past
|
dit | this
|
||||||
da | then, when/as/since
|
zo | so, thus, in this way
|
||||||
fra | from/off/since, off, since
|
door | through by
|
||||||
du | you
|
over | over, across
|
||||||
ud | out
|
ze | she, her, they, them
|
||||||
sin | his/her/its/one's
|
zich | oneself
|
||||||
dem | them
|
bij | (1) a bee, (2) by, near, at
|
||||||
os | us/ourselves
|
ook | also, too
|
||||||
op | up
|
tot | till, until
|
||||||
man | you/one
|
je | you
|
||||||
hans | his
|
mij | me
|
||||||
hvor | where
|
uit | out of, from
|
||||||
eller | or
|
der | Old Dutch form of 'van der' still found in surnames
|
||||||
hvad | what
|
daar | (1) there, (2) because
|
||||||
skal | must/shall etc.
|
haar | (1) her, their, them, (2) hair
|
||||||
selv | myself/youself/herself/ourselves etc., even
|
naar | (1) unpleasant, unwell etc, (2) towards, (3) as
|
||||||
her | here
|
heb | present first person sing. of 'to have'
|
||||||
alle | all/everyone/everybody etc.
|
hoe | how, why
|
||||||
vil | will (verb)
|
heeft | present third person sing. of 'to have'
|
||||||
blev | past tense of "to stay/to remain/to get/to become"
|
hebben | 'to have' and various parts thereof
|
||||||
kunne | could
|
deze | this
|
||||||
ind | in
|
u | you
|
||||||
når | when
|
want | (1) for, (2) mitten, (3) rigging
|
||||||
være | present tense of "to be"
|
nog | yet, still
|
||||||
dog | however/yet/after all
|
zal | 'shall', first and third person sing. of verb 'zullen' (will)
|
||||||
noget | something
|
me | me
|
||||||
ville | would
|
zij | she, they
|
||||||
jo | you know/you see (adv), yes
|
nu | now
|
||||||
deres | their/theirs
|
ge | 'thou', still used in Belgium and south Netherlands
|
||||||
efter | after/behind/according to/for/by/from, later/afterwards
|
geen | none
|
||||||
ned | down
|
omdat | because
|
||||||
skulle | should
|
iets | something, somewhat
|
||||||
denne | this
|
worden | to become, grow, get
|
||||||
end | than
|
toch | yet, still
|
||||||
dette | this
|
al | all, every, each
|
||||||
mit | my/mine
|
waren | (1) 'were' (2) to wander, (3) wares, (3)
|
||||||
også | also
|
veel | much, many
|
||||||
under | under/beneath/below/during, below/underneath
|
meer | (1) more, (2) lake
|
||||||
have | have
|
doen | to do, to make
|
||||||
dig | you
|
toen | then, when
|
||||||
anden | other
|
moet | noun 'spot/mote' and present form of 'to must'
|
||||||
hende | her
|
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
|
||||||
mine | my
|
zonder | without
|
||||||
alt | everything
|
kan | noun 'can' and present form of 'to be able'
|
||||||
meget | much/very, plenty of
|
hun | their, them
|
||||||
sit | his, her, its, one's
|
dus | so, consequently
|
||||||
sine | his, her, its, one's
|
alles | all, everything, anything
|
||||||
vor | our
|
onder | under, beneath
|
||||||
mod | against
|
ja | yes, of course
|
||||||
disse | these
|
eens | once, one day
|
||||||
hvis | if
|
hier | here
|
||||||
din | your/yours
|
wie | who
|
||||||
nogle | some
|
werd | imperfect third person sing. of 'become'
|
||||||
hos | by/at
|
altijd | always
|
||||||
blive | be/become
|
doch | yet, but etc
|
||||||
mange | many
|
wordt | present third person sing. of 'become'
|
||||||
ad | by/through
|
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
|
||||||
bliver | present tense of "to be/to become"
|
kunnen | to be able
|
||||||
hendes | her/hers
|
ons | us/our
|
||||||
været | be
|
zelf | self
|
||||||
thi | for (conj)
|
tegen | against, towards, at
|
||||||
jer | you
|
na | after, near
|
||||||
sådan | such, like this/like that
|
reeds | already
|
||||||
|
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
|
||||||
|
kon | could; past tense of 'to be able'
|
||||||
|
niets | nothing
|
||||||
|
uw | your
|
||||||
|
iemand | somebody
|
||||||
|
geweest | been; past participle of 'be'
|
||||||
|
andere | other
|
||||||
''')
|
''')
|
||||||
|
|
||||||
js_stemmer = u"""
|
js_stemmer = u"""
|
||||||
@ -122,7 +129,7 @@ class SearchDutch(SearchLanguage):
|
|||||||
language_name = 'Dutch'
|
language_name = 'Dutch'
|
||||||
js_stemmer_rawcode = 'dutch-stemmer.js'
|
js_stemmer_rawcode = 'dutch-stemmer.js'
|
||||||
js_stemmer_code = js_stemmer
|
js_stemmer_code = js_stemmer
|
||||||
stopwords = danish_stopwords
|
stopwords = dutch_stopwords
|
||||||
|
|
||||||
def init(self, options):
|
def init(self, options):
|
||||||
self.stemmer = snowballstemmer.stemmer('dutch')
|
self.stemmer = snowballstemmer.stemmer('dutch')
|
||||||
|
Loading…
Reference in New Issue
Block a user