Saturday, February 7, 2015

py11. Frequency table in Python

The csv file is downloaded from http://countrylist.net/en/. The delimiter is a semicolon and not a comma.


According to the file, there are 3 countries in Antarctica.


The groupby function is used to organize the data under the category of 'continent'. We are usually are only interested in the 'name' column.

# ex11.py
from __future__ import division, print_function
from pandas import read_table
df = read_table('2015-02-07.dump.countrylist.net.csv',
                sep = ';') 
print('\n***The columns are')
for i in df.columns: print(i)
print('\n\n***There are %d rows.' % len(df))
print('\n***Countries in Antartica:')
df1 = df[df['continent'] == 'Antarctica']
print(df1['name'])
group = df.groupby('continent')
print('\n\n***Frequency table:')
print(group.count()['name'])
#***The columns are
#id
#continent
#name
#capital
#iso-2
#iso-3
#ioc
#tld
#currency
#phone
#utc
#wiki
#name_de
#capital_de
#wiki_de
#
#
#***There are 250 rows.
#
#***Countries in Antartica:
#10                     Antarctica
#32                  Bouvet Island
#64    French Southern Territories
#Name: name, dtype: object
#
#
#***Frequency table:
#continent
#Africa           62
#Antarctica        3
#Asia             55
#Australia        26
#Europe           53
#North America    31
#South America    20
#Name: name, dtype: int64

2 comments:

  1. # ex11.py - py11. Frequency table in Python
    from __future__ import division, print_function
    from pandas import read_table
    df = read_table('2015-02-07.dump.countrylist.net.csv',
    sep = ';')
    print('\n***The columns are')
    for i in df.columns: print(i)
    print('\n\n***There are %d rows.' % len(df))
    print('\n***Countries in Antartica:')
    df1 = df[df['kontinent'] == 'Antarctica']
    print(df1['name_en'])
    group = df.groupby('kontinent')
    print('\n\n***Frequency table:')
    print(group.count()['name_en'])
    #***The columns are
    #id
    #kontinent
    #name_en
    #capital
    #iso-2
    #iso-3
    #ioc
    #tld
    #currency
    #phone
    #utc
    #wiki
    #name_de
    #capital_de
    #wiki_de
    #
    #
    #***There are 250 rows.
    #
    #***Countries in Antartica:
    #10 Antarctica
    #32 Bouvet Island
    #64 French Southern Territories
    #Name: name, dtype: object
    #
    #
    #***Frequency table:
    #continent
    #Africa 62
    #Antarctica 3
    #Asia 55
    #Australia 26
    #Europe 53
    #North America 31
    #South America 20
    #Name: name, dtype: int64

    ReplyDelete