2 changed files with 99 additions and 0 deletions
@ -0,0 +1,50 @@ |
|||
# This file contains a list of stemmers to include in the distribution. |
|||
# The format is a set of space separated lines - on each line: |
|||
# First item is name of stemmer. |
|||
# Second item is comma separated list of character sets. |
|||
# Third item is comma separated list of names to refer to the stemmer by. |
|||
# |
|||
# Lines starting with a #, or blank lines, are ignored. |
|||
|
|||
# List all the main algorithms for each language, in UTF-8, and also with |
|||
# the most commonly used encoding. |
|||
|
|||
danish UTF_8,ISO_8859_1 danish,da,dan |
|||
dutch UTF_8,ISO_8859_1 dutch,nl,dut,nld |
|||
english UTF_8,ISO_8859_1 english,en,eng |
|||
finnish UTF_8,ISO_8859_1 finnish,fi,fin |
|||
french UTF_8,ISO_8859_1 french,fr,fre,fra |
|||
german UTF_8,ISO_8859_1 german,de,ger,deu |
|||
hungarian UTF_8,ISO_8859_2 hungarian,hu,hun |
|||
italian UTF_8,ISO_8859_1 italian,it,ita |
|||
norwegian UTF_8,ISO_8859_1 norwegian,no,nor |
|||
portuguese UTF_8,ISO_8859_1 portuguese,pt,por |
|||
romanian UTF_8,ISO_8859_2 romanian,ro,rum,ron |
|||
russian UTF_8,KOI8_R russian,ru,rus |
|||
spanish UTF_8,ISO_8859_1 spanish,es,esl,spa |
|||
swedish UTF_8,ISO_8859_1 swedish,sv,swe |
|||
turkish UTF_8 turkish,tr,tur |
|||
|
|||
# Also include the traditional porter algorithm for english. |
|||
# The porter algorithm is included in the libstemmer distribution to assist |
|||
# with backwards compatibility, but for new systems the english algorithm |
|||
# should be used in preference. |
|||
porter UTF_8,ISO_8859_1 porter |
|||
|
|||
# Some other stemmers in the snowball project are not included in the standard |
|||
# distribution. To compile a libstemmer with them in, add them to this list, |
|||
# and regenerate the distribution. (You will need a full source checkout for |
|||
# this.) They are included in the snowball website as curiosities, but are not |
|||
# intended for general use, and use of them is is not fully supported. These |
|||
# algorithms are: |
|||
# |
|||
# german2 - This is a slight modification of the german stemmer. |
|||
#german2 UTF_8,ISO_8859_1 german2 |
|||
# |
|||
# kraaij_pohlmann - This is a different dutch stemmer. |
|||
#kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann |
|||
# |
|||
# lovins - This is an english stemmer, but fairly outdated, and |
|||
# only really applicable to a restricted type of input text |
|||
# (keywords in academic publications). |
|||
#lovins UTF_8,ISO_8859_1 lovins |
|||
@ -0,0 +1,49 @@ |
|||
# This file contains a list of stemmers to include in the distribution. |
|||
# The format is a set of space separated lines - on each line: |
|||
# First item is name of stemmer. |
|||
# Second item is comma separated list of character sets. |
|||
# Third item is comma separated list of names to refer to the stemmer by. |
|||
# |
|||
# Lines starting with a #, or blank lines, are ignored. |
|||
|
|||
# List all the main algorithms for each language, in UTF-8. |
|||
|
|||
danish UTF_8 danish,da,dan |
|||
dutch UTF_8 dutch,nl,dut,nld |
|||
english UTF_8 english,en,eng |
|||
finnish UTF_8 finnish,fi,fin |
|||
french UTF_8 french,fr,fre,fra |
|||
german UTF_8 german,de,ger,deu |
|||
hungarian UTF_8 hungarian,hu,hun |
|||
italian UTF_8 italian,it,ita |
|||
norwegian UTF_8 norwegian,no,nor |
|||
portuguese UTF_8 portuguese,pt,por |
|||
romanian UTF_8 romanian,ro,rum,ron |
|||
russian UTF_8 russian,ru,rus |
|||
spanish UTF_8 spanish,es,esl,spa |
|||
swedish UTF_8 swedish,sv,swe |
|||
turkish UTF_8 turkish,tr,tur |
|||
|
|||
# Also include the traditional porter algorithm for english. |
|||
# The porter algorithm is included in the libstemmer distribution to assist |
|||
# with backwards compatibility, but for new systems the english algorithm |
|||
# should be used in preference. |
|||
porter UTF_8 porter |
|||
|
|||
# Some other stemmers in the snowball project are not included in the standard |
|||
# distribution. To compile a libstemmer with them in, add them to this list, |
|||
# and regenerate the distribution. (You will need a full source checkout for |
|||
# this.) They are included in the snowball website as curiosities, but are not |
|||
# intended for general use, and use of them is is not fully supported. These |
|||
# algorithms are: |
|||
# |
|||
# german2 - This is a slight modification of the german stemmer. |
|||
#german2 UTF_8 german2 |
|||
# |
|||
# kraaij_pohlmann - This is a different dutch stemmer. |
|||
#kraaij_pohlmann UTF_8 kraaij_pohlmann |
|||
# |
|||
# lovins - This is an english stemmer, but fairly outdated, and |
|||
# only really applicable to a restricted type of input text |
|||
# (keywords in academic publications). |
|||
#lovins UTF_8 lovins |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue