Sunday, May 31, 2015

nlp29. Creating chunked corpus in Python NLTK

The tagged chunks have to placed between [ and ].


Three lines are written with one chunk in each line about a possible machine learning outcome.

# nlp29.py
from __future__ import print_function
import os
from nltk.data import path
name = 'MyTest3Corpus'
name1 = 'Test3'
words1 = []
words1 += ['[Bush/R Clinton/D] odds are 1 in 2\n']
words1 += ['[Rubio/R Clinton/D] odds are 1 in 3\n']
words1 += ['The odds of [Paul/R Sanders/D] are small\n'] 
os.chdir(path[0])
os.mkdir(name)
os.chdir(name)
fout1 = open(name1,'w')
fout1.writelines(words1)
fout1.close()
print('word1 = \n',words1)

# word1 = 
#  ['[Bush/R Clinton/D] odds are 1 in 2\n',
#   '[Rubio/R Clinton/D] odds are 1 in 3\n',
#   'The odds of [Paul/R Sanders/D] are small\n']

No comments:

Post a Comment