Documentation
Using Pandas for data science
# Import Pandas Package
import pandas as pd
#create a dataframe object using pandas.read_csv("FileName.csv", sep=",") sep short for separator as comma.
california_housing_dataframe = pd.read_csv("sample_data/california_housing_train.csv", sep=",")
# pandas.size returns the size or number of rows in the file
california_housing_dataframe.size
# pandas.head(10) prints the first 10 rows
california_housing_dataframe.head(10)
# pandas.hist("column_Name") calls matplotlib to create a chart based on "column_Name"
california_housing_dataframe.hist('housing_median_age')
# Creates a new dataframe with the two columns "column1", "column2"
chd_beds_pop = california_housing_dataframe[ ['column1', 'column2'] ]
# creates a new dataframe with rows where column2 is greater than 100
big_pop = california_housing_dataframe.loc[california_housing_dataframe['column2'] > 100]
# returns size/number of rows
big_pop.size
# adds column3 = average (column1 divided by column2)
big_pop['column3'] = big_pop.apply(lambda row: row.column1/row.column2, axis=1)
#