Sunday, May 31, 2015

nlp30. Reading and Drawing a Chunked Corpus in Python NLTK

The chunked text is read using ChunkedCorpusReader.


Only the chunks have tags, and the others have None.


Three graphs are created. When one is closed by clicking on X, the second will pop up and so on. The draw method requires matplotlib should have been installed.

# nlp30.py
from __future__ import print_function
from nltk.data import path
from nltk.corpus.reader import ChunkedCorpusReader
path = path[0] + '/MyTest3Corpus'
reader = ChunkedCorpusReader(path, '.*')
words1 = reader.tagged_words()
for word in words1:
    print(word)
lines1 = reader.chunked_sents()
for l1 in lines1:
    l1.draw()
    
#    (u'Bush', u'R')
#    (u'Clinton', u'D')
#    (u'odds', None)
#    (u'are', None)
#    (u'1', None)
#    (u'in', None)
#    (u'2', None)
#    (u'Rubio', u'R')
#    (u'Clinton', u'D')
#    (u'odds', None)
#    (u'are', None)
#    (u'1', None)
#    (u'in', None)
#    (u'3', None)
#    (u'The', None)
#    (u'odds', None)
#    (u'of', None)
#    (u'Paul', u'R')
#    (u'Sanders', u'D')
#    (u'are', None)
#    (u'small', None)

Outputs:

No comments:

Post a Comment