Export
Unigram标记化
Install the Transformers, Datasets, and Evaluate libraries to run this notebook.
[ ]
[ ]
[ ]
[ ]
[ ]
[('▁t', 7), ('is', 5), ('er', 5), ('▁a', 5), ('▁to', 4), ('to', 4), ('en', 4), ('▁T', 3), ('▁Th', 3), ('▁Thi', 3)] [ ]
[ ]
[ ]
[ ]
(['H', 'o', 'p', 'e', 'f', 'u', 'll', 'y'], 41.5157494601402) ,(['This'], 6.288267030694535)
[ ]
[ ]
413.10377642940875
[ ]
[ ]
6.376412403623874 ,0.0
[ ]
[ ]
['▁This', '▁is', '▁the', '▁Hugging', '▁Face', '▁', 'c', 'ou', 'r', 's', 'e', '.']