bash
pip install khmer-nltk
To get the evaluation result of khmer-nltk's functionalities, please refer the sub-modules's readme
```python
from khmernltk import sentence_tokenize raw_text = "αα½αααααΆαααΈα’α¨! α’α£ αα»ααΆ ααααΆαααΈααααααααΆααΆαα·αααΆαααααααα·αααααα ααΆααα αααα αααααααααΆα ααΆααααααΊααααα·ααΆα αα·αααΆααα½ααα½αααΆααααΈ" print(sentence_tokenize(raw_text)) ['αα½αααααΆαααΈα’α¨!', 'α’α£ αα»ααΆ ααααΆαααΈααααααααΆααΆαα·αααΆαααααααα·αααααα ααΆααα αααα αααααααααΆα ααΆααααααΊααααα·ααΆα αα·αααΆααα½ααα½αααΆααααΈ'] ```
```python
from khmernltk import word_tokenize raw_text = "αα½αααααΆαααΈα’α¨! α’α£ αα»ααΆ ααααΆαααΈααααααααΆααΆαα·αααΆαααααααα·αααααα ααΆααα αααα αααααααααΆα ααΆααααααΊααααα·ααΆα αα·αααΆααα½ααα½αααΆααααΈ" print(word_tokenize(raw_text, return_tokens=True)) ['αα½α', 'ααααΆα', 'ααΈ', 'α’α¨', '!', ' ', 'α’α£', ' ', 'αα»ααΆ', ' ', 'ααααΆαααΈ', 'ααααααααΆ', 'ααΆαα·', 'αααΆα', 'ααααα', 'αα·α', 'ααααα', ' ', 'ααΆα', 'αα ', 'αααα αα', 'αααααααΆα', ' ', 'ααΆα', 'αααααΊ', 'ααααα·ααΆα', ' ', 'αα·α', 'ααΆααα½ααα½α', 'ααΆααααΈ'] ```
```python
from khmernltk import pos_tag raw_text = "αα½αααααΆαααΈα’α¨! α’α£ αα»ααΆ ααααΆαααΈααααααααΆααΆαα·αααΆαααααααα·αααααα ααΆααα αααα αααααααααΆα ααΆααααααΊααααα·ααΆα αα·αααΆααα½ααα½αααΆααααΈ" print(pos_tag(raw_text)) [('αα½α', 'n'), ('ααααΆα', 'n'), ('ααΈ', 'n'), ('α’α¨', '1'), ('!', '.'), (' ', 'n'), ('α’α£', '1'), (' ', 'n'), ('αα»ααΆ', 'n'), (' ', 'n'), ('ααααΆαααΈ', 'n'), ('ααααααααΆ', 'n'), ('ααΆαα·', 'n'), ('αααΆα', 'o'), ('ααααα', 'n'), ('αα·α', 'o'), ('ααααα', 'n'), (' ', 'n'), ('ααΆα', 'v'), ('αα ', 'v'), ('αααα αα', 'v'), ('αααααααΆα', 'n'), (' ', 'n'), ('ααΆα', 'v'), ('αααααΊ', 'n'), ('ααααα·ααΆα', 'n'), (' ', 'n'), ('αα·α', 'o'), ('ααΆααα½ααα½α', 'n'), ('ααΆααααΈ', 'o')] ```
bibtex
@misc{hoang-khmer-nltk,
author = {Phan Viet Hoang},
title = {Khmer Natural Language Processing Tookit},
year = {2020},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/VietHoang1512/khmer-nltk}}
}
This is the first release of Khmer-NLTK package
nlp nlp-library khmer-language crf segmentation part-of-speech-tagging