This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import chain | |
from nltk.corpus import wordnet | |
import nltk | |
sentence = "I am buying this book" | |
tokens = nltk.word_tokenize(sentence) | |
tagged = nltk.pos_tag(tokens) | |
length = len(tagged) | |
verbs = list() | |
#https://stackoverflow.com/questions/15388831/what-are-all-possible-pos-tags-of-nltk | |
#VB verb, base form take | |
#VBD verb, past tense took | |
#VBG verb, gerund/present participle taking | |
#VBN verb, past participle taken | |
#VBP verb, sing. present, non-3d take | |
#VBZ verb, 3rd person sing. present takes | |
print(tagged) | |
for i in range(0, length): | |
print(tagged[i][1]) | |
if(tagged[i][1] == 'VB' or tagged[i][1] == 'VBD' or tagged[i][1] == 'VBG' or tagged[i][1] == 'VBP' or tagged[i][1] == 'VBZ'): | |
verbs.append(tagged [i][0]) | |
print(verbs) | |
for word in verbs: | |
synonyms = wordnet.synsets(word) | |
lemmas = set(chain.from_iterable([word.lemma_names() for word in synonyms])) | |
print('Word in Context') | |
print(word) | |
print('Similar Synomynms') | |
for word in lemmas: | |
print(word) |