A lexical dispersion plot will plot occurences of words in a text.
Here, we select a subset of stopwords that occur more than 90 times and less than 100 times. There are 4 such words and they form the to_plot list, which is sent to the dispersion_plot function.
#nlp9.py
from __future__ import print_function, division
from nltk.corpus import stopwords
from nltk.book import text4
print("%s has a vocabulary of %d" % (text4,len(set(text4))))
words = stopwords.words('english')
to_plot = []
tot = 0
for word in words:
count = text4.count(word)
tot += count
if 90<count<100:
to_plot.append(word)
print("count of %s is %d" % (word,count))
print("A total of %d stop words were used." % tot)
print("Total text length is",len(text4))
text4.dispersion_plot(to_plot)
# <Text: Inaugural Address Corpus> has a vocabulary of 9754
# count of between is 93
# count of through is 99
# count of out is 91
# count of some is 91
# A total of 64854 stop words were used.
# Total text length is 145735
Output:
No comments:
Post a Comment