In the previous program, the stop list S contained 127 words.
We remove the words 'then' and 'now' using set operations so its new size is 125. Even though S changed from the list to set, the rest of the program did not have to change.
# nlp8.py
from __future__ import print_function, division
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
lines = """Dr. Brown gave a speech. I'd wish I knew then
what I know now. Finally, he praised Python! At 8 o'clock,
he went home."""
S = stopwords.words("english")
nS = ['then','now']
S = set(S)-set(nS)
t = '\t'
A = word_tokenize(lines.lower())
for a in A:
if a not in S:
print(t,a)
# dr.
# brown
# gave
# speech
# .
# 'd
# wish
# knew
# then
# know
# now
# .
# finally
# ,
# praised
# python
# !
# 8
# o'clock
# ,
# went
# home
# .
No comments:
Post a Comment