Very good custom end-to-end example post -
Very detailed post
- Creating of the custom test dataset
- Data sources bookmark links are
Dataset
Code Changes / Minor Fixes
- One code issue would come Migrate from spacy2 to Spacy 3 code changes
- Block of code changes are
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# add NER to the pipeline and the new label | |
#to run on spacy3.0 | |
from spacy.training import Example | |
ner = nlp.get_pipe("ner") | |
ner.add_label("FOOD") | |
# get the names of the components we want to disable during training | |
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"] | |
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] | |
# start the training loop, only training NER | |
epochs = 30 | |
optimizer = nlp.resume_training() | |
with nlp.disable_pipes(*other_pipes), warnings.catch_warnings(): | |
warnings.filterwarnings("once", category=UserWarning, module='spacy') | |
sizes = compounding(1.0, 4.0, 1.001) | |
# batch up the examples using spaCy's minibatc | |
for epoch in range(epochs): | |
examples = TRAIN_DATA | |
random.shuffle(examples) | |
batches = minibatch(examples, size=sizes) | |
losses = {} | |
for batch in batches: | |
texts, annotations = zip(*batch) | |
#wont work in spacy3.0 | |
#nlp.update(texts, annotations, sgd=optimizer, drop=0.35, losses=losses) | |
example = [] | |
# Update the model with iterating each text | |
for i in range(len(texts)): | |
doc = nlp.make_doc(texts[i]) | |
example.append(Example.from_dict(doc, annotations[i])) | |
# Update the model | |
nlp.update(example, drop=0.5, losses=losses) | |
print("Losses ({}/{})".format(epoch + 1, epochs), losses) |
Keep Exploring!!!
No comments:
Post a Comment