diff options
Diffstat (limited to 'extra/libtextcat/fpdb.conf')
-rw-r--r-- | extra/libtextcat/fpdb.conf | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/extra/libtextcat/fpdb.conf b/extra/libtextcat/fpdb.conf new file mode 100644 index 000000000..329184d51 --- /dev/null +++ b/extra/libtextcat/fpdb.conf @@ -0,0 +1,86 @@ +# +# A sample config file for the language models +# provided with Gertjan van Noords language guesser +# (http://odur.let.rug.nl/~vannoord/TextCat/) +# +# Notes: +# - You may consider eliminating a couple of small languages from this +# list because they cause false positives with big languages and are +# bad for performance. (Do you really want to recognize Drents?) +# - Putting the most probable languages at the top of the list +# improves performance, because this will raise the threshold for +# likely candidates more quickly. +# + +# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding +# guess strings are made as following : language-country-encoding + +afrikaans.lm af--utf8 +albanian.lm sq--utf8 +amharic_utf.lm am--utf8 +arabic.lm ar--utf8 +basque.lm eu--utf8 +belarus.lm be--utf8 +bosnian.lm bs--utf8 +breton.lm br--utf8 +catalan.lm ca--utf8 +chinese_simplified.lm zh-CN-utf8 +chinese_traditional.lm zh-TW-utf8 +croatian.lm hr--utf8 +czech.lm cs--utf8 +danish.lm da--utf8 +dutch.lm nl--utf8 +english.lm en--utf8 +esperanto.lm eo--utf8 +estonian.lm et--utf8 +finnish.lm fi--utf8 +french.lm fr--utf8 +frisian.lm fy--utf8 +georgian.lm ka--utf8 +german.lm de--utf8 +greek.lm el--utf8 +hebrew.lm he--utf8 +hindi.lm hi--utf8 +hungarian.lm hu--utf8 +icelandic.lm is--utf8 +indonesian.lm id--utf8 +irish_gaelic.lm ga--utf8 +italian.lm it--utf8 +japanese.lm ja--utf8 +korean.lm ko--utf8 +latin.lm la--utf8 +latvian.lm lv--utf8 +lithuanian.lm lt--utf8 +luxembourgish.lm lb--utf8 +malay.lm ms--utf8 +manx_gaelic.lm gv--utf8 +marathi.lm mr--utf8 +mongolian_cyrillic.lm mn--utf8 +nepali.lm ne--utf8 +norwegian.lm nb--utf8 # Norwegian (Bokmal) +persian.lm fa--utf8 # Farsi +polish.lm pl--utf8 +portuguese.lm pt-PT-utf8 +quechua.lm qu--utf8 +romanian.lm ro--utf8 +romansh.lm rm--utf8 +russian.lm ru--utf8 +sanskrit.lm sa--utf8 +scots.lm sco--utf8 +scots_gaelic.lm gd--utf8 +serbian.lm sr--utf-8 +serbian-latin.lm sh--utf-8 +slovak_ascii.lm sk-SK-utf8 +slovenian.lm sl--utf8 +spanish.lm es--utf8 +swahili.lm sw--utf8 +swedish.lm sv--utf8 +tagalog.lm tl--utf8 +tamil.lm ta--utf8 +thai.lm th--utf8 +turkish.lm tr--utf8 +ukrainian.lm uk--utf8 +vietnamese.lm vi--utf8 +welsh.lm cy--utf8 +yiddish_utf.lm yi--utf8 +zulu.lm zu--utf8 |