We use TaggedCorpusReader to read the corpus created in last program.
The methods word() and tagged_words() are used to get strings or tuples of the file contents.
# nlp28.py
from __future__ import print_function
from nltk.data import path
from nltk.corpus.reader import TaggedCorpusReader
path = path[0] + '/MyTest2Corpus'
reader = TaggedCorpusReader(path, '.*')
word1 = reader.words()
print("words1 =")
for i in range(len(word1)/2):
print(word1[2*i],'\t',word1[2*i+1])
print()
tag_words1 = reader.tagged_words()
print("tag_words1 =")
for i in range(len(tag_words1)/2):
print(tag_words1[2*i],'\t',tag_words1[2*i+1])
# words1 =
# Bush Clinton
# Rubio Clinton
# Paul Sanders
#
# tag_words1 =
# (u'Bush', u'R') (u'Clinton', u'D')
# (u'Rubio', u'R') (u'Clinton', u'D')
# (u'Paul', u'R') (u'Sanders', u'D')
No comments:
Post a Comment