1. download through script
from transformers import T5Tokenizer, T5Model
model_name = 't5-base'
model = T5Model.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)
model.save_pretrained('T5-base')
tokenizer.save_pretrained('T5-base')
2. encode a sentence with T5
from transformers import T5Tokenizer, T5Model
import torch
model_dir = 'T5/t5-base'
tokenizer = T5Tokenizer.from_pretrained(model_dir)
text = "Hello, my dog is cute"
tokenized_text = tokenizer.encode_plus(text, return_tensors="pt")
model = T5Model.from_pretrained(model_dir)
model.eval()
with torch.no_grad():
outputs = model(input_ids=tokenized_text['input_ids'], attention_mask=tokenized_text['attention_mask'])
hidden_states = outputs[0]
print(hidden_states)