Pre-trained models
cs
language code: cs, language name: czech, training sample size: 229k
- model file: cs-sentence-detector.onlpm, evaluation report: cs-sentence-detector.txt, training algorithm: MAXENT, evaluation score: 85.47%
- model file: cs-tokenizer.onlpm, evaluation report: cs-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.95%
- model file: cs-pos-tagger.onlpm, evaluation report: cs-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 97.94%
- model file: cs-lemmatizer.onlpm, evaluation report: cs-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 93.83%
da
language code: da, language name: danish, training sample size: 8k
- model file: da-sentence-detector.onlpm, evaluation report: da-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 87.73%
- model file: da-tokenizer.onlpm, evaluation report: da-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.89%
- model file: da-pos-tagger.onlpm, evaluation report: da-pos-tagger.txt, training algorithm: PERCEPTRON, evaluation score: 96.24%
- model file: da-lemmatizer.onlpm, evaluation report: da-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 94.29%
de
language code: de, language name: german, training sample size: 124k
- model file: de-sentence-detector.onlpm, evaluation report: de-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 72.88%
- model file: de-tokenizer.onlpm, evaluation report: de-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.93%
- model file: de-pos-tagger.onlpm, evaluation report: de-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 97.20%
- model file: de-lemmatizer.onlpm, evaluation report: de-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 85.02%
el
language code: el, language name: greek, training sample size: 7k
- model file: el-sentence-detector.onlpm, evaluation report: el-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 87.08%
- model file: el-tokenizer.onlpm, evaluation report: el-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.97%
- model file: el-pos-tagger.onlpm, evaluation report: el-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 97.82%
- model file: el-lemmatizer.onlpm, evaluation report: el-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 96.83%
en
language code: en, language name: english, training sample size: 75k
- model file: en-sentence-detector.onlpm, evaluation report: en-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 75.93%
- model file: en-tokenizer.onlpm, evaluation report: en-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.64%
- model file: en-pos-tagger.onlpm, evaluation report: en-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 96.72%
- model file: en-lemmatizer.onlpm, evaluation report: en-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 93.94%
es
language code: es, language name: spanish, training sample size: 61k
- model file: es-sentence-detector.onlpm, evaluation report: es-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 97.27%
- model file: es-tokenizer.onlpm, evaluation report: es-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.92%
- model file: es-pos-tagger.onlpm, evaluation report: es-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 95.94%
- model file: es-lemmatizer.onlpm, evaluation report: es-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 95.86%
fi
language code: fi, language name: finnish, training sample size: 59k
- model file: fi-sentence-detector.onlpm, evaluation report: fi-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 81.95%
- model file: fi-tokenizer.onlpm, evaluation report: fi-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.76%
- model file: fi-pos-tagger.onlpm, evaluation report: fi-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 93.68%
- model file: fi-lemmatizer.onlpm, evaluation report: fi-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 93.76%
fr
language code: fr, language name: french, training sample size: 42k
- model file: fr-sentence-detector.onlpm, evaluation report: fr-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 92.62%
- model file: fr-tokenizer.onlpm, evaluation report: fr-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.94%
- model file: fr-pos-tagger.onlpm, evaluation report: fr-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 96.90%
- model file: fr-lemmatizer.onlpm, evaluation report: fr-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 95.10%
he
language code: he, language name: hebrew, training sample size: 11k
- model file: he-sentence-detector.onlpm, evaluation report: he-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 95.74%
- model file: he-tokenizer.onlpm, evaluation report: he-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 92.59%
- model file: he-pos-tagger.onlpm, evaluation report: he-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 94.68%
- model file: he-lemmatizer.onlpm, evaluation report: he-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 96.93%
it
language code: it, language name: italian, training sample size: 72k
- model file: it-sentence-detector.onlpm, evaluation report: it-sentence-detector.txt, training algorithm: MAXENT, evaluation score: 74.14%
- model file: it-tokenizer.onlpm, evaluation report: it-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.79%
- model file: it-pos-tagger.onlpm, evaluation report: it-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 96.80%
- model file: it-lemmatizer.onlpm, evaluation report: it-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 96.07%
ja
language code: ja, language name: japanese, training sample size: 18k
- model file: ja-sentence-detector.onlpm, evaluation report: ja-sentence-detector.txt, training algorithm: MAXENT, evaluation score: 99.25%
- model file: ja-tokenizer.onlpm, evaluation report: ja-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 81.43%
- model file: ja-pos-tagger.onlpm, evaluation report: ja-pos-tagger.txt, training algorithm: PERCEPTRON, evaluation score: 97.86%
- model file: ja-lemmatizer.onlpm, evaluation report: ja-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 97.65%
ko
language code: ko, language name: korean, training sample size: 37k
- model file: ko-sentence-detector.onlpm, evaluation report: ko-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 95.14%
- model file: ko-tokenizer.onlpm, evaluation report: ko-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.40%
- model file: ko-pos-tagger.onlpm, evaluation report: ko-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 89.85%
- model file: ko-lemmatizer.onlpm, evaluation report: ko-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 87.83%
no
language code: no, language name: norwegian, training sample size: 61k
- model file: no-sentence-detector.onlpm, evaluation report: no-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 84.07%
- model file: no-tokenizer.onlpm, evaluation report: no-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.93%
- model file: no-pos-tagger.onlpm, evaluation report: no-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 97.05%
- model file: no-lemmatizer.onlpm, evaluation report: no-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 94.25%
pl
language code: pl, language name: polish, training sample size: 72k
- model file: pl-sentence-detector.onlpm, evaluation report: pl-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 95.99%
- model file: pl-tokenizer.onlpm, evaluation report: pl-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.86%
- model file: pl-pos-tagger.onlpm, evaluation report: pl-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 96.76%
- model file: pl-lemmatizer.onlpm, evaluation report: pl-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 91.82%
pt
language code: pt, language name: portuguese, training sample size: 113k
- model file: pt-sentence-detector.onlpm, evaluation report: pt-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 74.60%
- model file: pt-tokenizer.onlpm, evaluation report: pt-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.61%
- model file: pt-pos-tagger.onlpm, evaluation report: pt-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 96.36%
- model file: pt-lemmatizer.onlpm, evaluation report: pt-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 94.25%
ru
language code: ru, language name: russian, training sample size: 186k
- model file: ru-sentence-detector.onlpm, evaluation report: ru-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 92.73%
- model file: ru-tokenizer.onlpm, evaluation report: ru-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.87%
- model file: ru-pos-tagger.onlpm, evaluation report: ru-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 98.20%
- model file: ru-lemmatizer.onlpm, evaluation report: ru-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 97.11%
sv
language code: sv, language name: swedish, training sample size: 20k
- model file: sv-sentence-detector.onlpm, evaluation report: sv-sentence-detector.txt, training algorithm: MAXENT, evaluation score: 86.66%
- model file: sv-tokenizer.onlpm, evaluation report: sv-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.97%
- model file: sv-pos-tagger.onlpm, evaluation report: sv-pos-tagger.txt, training algorithm: PERCEPTRON, evaluation score: 96.37%
- model file: sv-lemmatizer.onlpm, evaluation report: sv-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 96.42%
uk
language code: uk, language name: ukrainian, training sample size: 17k
- model file: uk-sentence-detector.onlpm, evaluation report: uk-sentence-detector.txt, training algorithm: MAXENT_QN, evaluation score: 92.87%
- model file: uk-tokenizer.onlpm, evaluation report: uk-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 99.92%
- model file: uk-pos-tagger.onlpm, evaluation report: uk-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 98.09%
- model file: uk-lemmatizer.onlpm, evaluation report: uk-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 96.97%
zh
language code: zh, language name: chinese, training sample size: 23k
- model file: zh-sentence-detector.onlpm, evaluation report: zh-sentence-detector.txt, training algorithm: MAXENT, evaluation score: 97.79%
- model file: zh-tokenizer.onlpm, evaluation report: zh-tokenizer.txt, training algorithm: MAXENT_QN, evaluation score: 95.77%
- model file: zh-pos-tagger.onlpm, evaluation report: zh-pos-tagger.txt, training algorithm: MAXENT, evaluation score: 95.91%
- model file: zh-lemmatizer.onlpm, evaluation report: zh-lemmatizer.txt, training algorithm: MAXENT, evaluation score: 99.50%