In a brand new cell, add the next to transform your Colab code to a Python script:
!pip set up transformers torch sentencepiecefrom transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Use an actual grammar-correcting mannequin
model_name = "prithivida/grammar_error_correcter_v1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
mannequin = AutoModelForSeq2SeqLM.from_pretrained(model_name)
def correct_grammar(textual content):
input_text = "gec: " + textual content
inputs = tokenizer.encode(input_text, return_tensors="pt")
outputs = mannequin.generate(inputs, max_length=128, num_beams=5, early_stopping=True)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Attempt it out
input_sentence = "she no went to the market right this moment"
corrected = correct_grammar(input_sentence)
print("Authentic:", input_sentence)
print("Corrected:", corrected)
Then obtain the file:
from google.colab import recordsdata
recordsdata.obtain("grammar_improver.py")