Sunday, May 31, 2015

nlp27. Creating tagged corpus in Python NLTK

We can tag each word in the corpus, putting the tag after the /.


Here a 6-word corpus is created with 6 tags.

# nlp27.py
from __future__ import print_function
import os
from nltk.data import path
name = 'MyTest2Corpus'
name1 = 'Test2'
word1 = ['Bush/R Clinton/D',
         'Rubio/R Clinton/D',
         'Paul/R Sanders/D']
os.chdir(path[0])
os.mkdir(name)
os.chdir(name)
word1 = [w+'\n' for w in word1]
fout1 = open(name1,'w')
fout1.writelines(word1)
fout1.close()
print('word1 = \n',word1)

# word1 = 
#  ['Bush/R Clinton/D\n', 'Rubio/R Clinton/D\n',
#   'Paul/R Sanders/D\n']

No comments:

Post a Comment