NLTK includes some corpora. However, we can create our own.
We use Python to create two files 'Test1' and 'Test2' in in the folder MyTestCorpus. MyTestCorpus is a subfolder, that is created, inside the nltk_data folder.
This could also have done manually.
# nlp25.py
from __future__ import print_function
import os
from nltk.data import path
name = 'MyTestCorpus'
name1 = 'Test1'
word1 = ['One','Two','Five']
name2 = 'Test2'
word2 = ['Three', 'Four', 'Seven']
os.chdir(path[0])
os.mkdir(name)
os.chdir(name)
word1 = [w+'\n' for w in word1]
fout1 = open(name1,'w')
fout1.writelines(word1)
fout1.close()
word2 = [w+'\n' for w in word2]
fout2 = open(name2,'w')
fout2.writelines(word2)
fout2.close()
print('word1 = \n',word1)
print('word2 = \n',word2)
# word1 =
# ['One\n', 'Two\n', 'Five\n']
# word2 =
# ['Three\n', 'Four\n', 'Seven\n']
No comments:
Post a Comment