This recipe includes the following topics:
- Load a csv file using Pandas
- Load an external csv file(github) using Pandas
Note:
– read_csv() returns a Pandas DataFrame
– The syntax for both reading a local file and an external file is the same
# import module
from pandas import read_csv
filename = 'pima-indians-diabetes.data.csv'
# define column names
cols = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
# load file as a Pandas DataFrame
pimaDf = read_csv(filename, names=cols)
# display row, column size
print(pimaDf.shape)
# display first 5 rows
print(pimaDf.head(5))
(768, 9)
preg plas pres skin test mass pedi age class
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1
# import module
from pandas import read_csv
fileGitURL = 'https://raw.githubusercontent.com/andrewgurung/data-repository/master/pima-indians-diabetes.data.csv'
# define column names
cols = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
# load file as a Pandas DataFrame
pimaDf = read_csv(fileGitURL, names=cols)
# display row, column size
print(pimaDf.shape)
(768, 9)