Posts List

Torchtext snippets

Load separate files data.Field parameters is here. INPUT = data.Field(lower=True, batch_first=True) TAG = data.Field(batch_first=True, unk_token=None, is_target=True) train, val, test = data.TabularDataset.splits(path=base_dir.as_posix(), train='train_data.csv', validation='val_data.csv', test='test_data.csv', format='tsv', fields=[(None, None), ('input', INPUT), ('tag', TAG)]) Load single file all_data = data.TabularDataset(path=base_dir / 'gossip_train_data.csv', format='tsv', fields=[('text', TEXT), ('category', CATEGORY)]) train, val, test = all_data.split([0.7, 0.2, 0.1]) Create iterator train_iter, val_iter, test_iter = data.BucketIterator.splits( (train, val, test), batch_sizes=(32, 256, 256), shuffle=True, sort_key=lambda x: x.input) Load pretrained vector vectors = Vectors(name='cc.