Section4 Pt

hf-notebookschapter7jacourse

翻訳 (PyTorch)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

[ ]

You will need to setup git, adapt your email and name in the following cell.

[ ]

You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.

[ ]
[ ]
[ ]
DatasetDict({
,    train: Dataset({
,        features: ['id', 'translation'],
,        num_rows: 210173
,    })
,})
[ ]
DatasetDict({
,    train: Dataset({
,        features: ['id', 'translation'],
,        num_rows: 189155
,    })
,    test: Dataset({
,        features: ['id', 'translation'],
,        num_rows: 21018
,    })
,})
[ ]
[ ]
{'en': 'Default to expanded threads',
, 'fr': 'Par défaut, développer les fils de discussion'}
[ ]
[{'translation_text': 'Par défaut pour les threads élargis'}]
[ ]
{'en': 'Unable to import %1 using the OFX importer plugin. This file is not the correct format.',
, 'fr': "Impossible d'importer %1 en utilisant le module d'extension d'importation OFX. Ce fichier n'a pas un format correct."}
[ ]
[{'translation_text': "Impossible d'importer %1 en utilisant le plugin d'importateur OFX. Ce fichier n'est pas le bon format."}]
[ ]
[ ]
[ ]
['▁Par', '▁dé', 'f', 'aut', ',', '▁dé', 've', 'lop', 'per', '▁les', '▁fil', 's', '▁de', '▁discussion', '</s>']
,['▁Par', '▁défaut', ',', '▁développer', '▁les', '▁fils', '▁de', '▁discussion', '</s>']
[ ]
[ ]
[ ]
[ ]
[ ]
dict_keys(['attention_mask', 'input_ids', 'labels', 'decoder_input_ids'])
[ ]
tensor([[  577,  5891,     2,  3184,    16,  2542,     5,  1710,     0,  -100,
,          -100,  -100,  -100,  -100,  -100,  -100],
,        [ 1211,     3,    49,  9409,  1211,     3, 29140,   817,  3124,   817,
,           550,  7032,  5821,  7907, 12649,     0]])
[ ]
tensor([[59513,   577,  5891,     2,  3184,    16,  2542,     5,  1710,     0,
,         59513, 59513, 59513, 59513, 59513, 59513],
,        [59513,  1211,     3,    49,  9409,  1211,     3, 29140,   817,  3124,
,           817,   550,  7032,  5821,  7907, 12649]])
[ ]
[577, 5891, 2, 3184, 16, 2542, 5, 1710, 0]
,[1211, 3, 49, 9409, 1211, 3, 29140, 817, 3124, 817, 550, 7032, 5821, 7907, 12649, 0]
[ ]
[ ]
[ ]
{'score': 46.750469682990165,
, 'counts': [11, 6, 4, 3],
, 'totals': [12, 11, 10, 9],
, 'precisions': [91.67, 54.54, 40.0, 33.33],
, 'bp': 0.9200444146293233,
, 'sys_len': 12,
, 'ref_len': 13}
[ ]
{'score': 1.683602693167689,
, 'counts': [1, 0, 0, 0],
, 'totals': [4, 3, 2, 1],
, 'precisions': [25.0, 16.67, 12.5, 12.5],
, 'bp': 0.10539922456186433,
, 'sys_len': 4,
, 'ref_len': 13}
[ ]
{'score': 0.0,
, 'counts': [2, 1, 0, 0],
, 'totals': [2, 1, 0, 0],
, 'precisions': [100.0, 100.0, 0.0, 0.0],
, 'bp': 0.004086771438464067,
, 'sys_len': 2,
, 'ref_len': 13}
[ ]
[ ]
[ ]
[ ]
[ ]
{'eval_loss': 1.6964408159255981,
, 'eval_bleu': 39.26865061007616,
, 'eval_runtime': 965.8884,
, 'eval_samples_per_second': 21.76,
, 'eval_steps_per_second': 0.341}
[ ]
[ ]
{'eval_loss': 0.8558505773544312,
, 'eval_bleu': 52.94161337775576,
, 'eval_runtime': 714.2576,
, 'eval_samples_per_second': 29.426,
, 'eval_steps_per_second': 0.461,
, 'epoch': 3.0}
[ ]
'https://huggingface.co/sgugger/marian-finetuned-kde4-en-to-fr/commit/3601d621e3baae2bc63d3311452535f8f58f6ef3'
[ ]
[ ]
[ ]
[ ]
[ ]
[ ]
'sgugger/marian-finetuned-kde4-en-to-fr-accelerate'
[ ]
[ ]
[ ]
epoch 0, BLEU score: 53.47
,epoch 1, BLEU score: 54.24
,epoch 2, BLEU score: 54.44
[ ]
[{'translation_text': 'Par défaut, développer les fils de discussion'}]
[ ]
[{'translation_text': "Impossible d'importer %1 en utilisant le module externe d'importation OFX. Ce fichier n'est pas le bon format."}]