import pandas as pd

# create lists

countries = ["China","India","Brazil","USA","Ethiopia","Egypt"]
capitals = ["Beijing","New Delhi","Brasil","Washington DC","Addis Ababa","Cairo"]
density = [153,464,25,36,115,103]

# create a list of column names
columns = ['countries','capitals','density']

# create a DataFrame of lists
# remember to use the zip function
pd.DataFrame(zip(countries,capitals,density), columns=columns)


import pandas as pd
# create lists

countries = ["China","India","Brazil","USA","Ethiopia","Egypt"]
capitals = ["Beijing","New Delhi","Brasil","Washington DC","Addis Ababa","Cairo"]
density = [153,464,25,36,115,103]

# create a dictionary of lists
my_dict = {'countries':countries, 'capitals':capitals, 'density':density}

# convert the dictionary of lists into a DataFrame
df = pd.DataFrame(my_dict)

# display the DataFrame
df


import pandas as pd

# import data saved in file
df = pd.read_csv('vehicle_data.csv')

# display the first 5 rows
df.head()


import pandas as pd
# create lists

countries = ["China","India","Brazil","USA","Ethiopia","Egypt"]
density = [153,464,25,36,115,103]

# create pandas series of each of the lists
countries = pd.Series(countries)
density = pd.Series(density)


countries

0       China
1       India
2      Brazil
3         USA
4    Ethiopia
5       Egypt
dtype: object


density

0    153
1    464
2     25
3     36
4    115
5    103
dtype: int64


# create Series with index
import pandas as pd
# create lists

countries = ["China","India","Brazil","USA","Ethiopia","Egypt"]
density = [153,464,25,36,115,103]

# create pandas series of each of the lists
density = pd.Series(density,index=countries)
density

China       153
India       464
Brazil       25
USA          36
Ethiopia    115
Egypt       103
dtype: int64


# create Series with index and name
import pandas as pd
# create lists

countries = ["China","India","Brazil","USA","Ethiopia","Egypt"]
density = [153,464,25,36,115,103]

# create pandas series of each of the lists
density = pd.Series(density,index=countries,name="density")
density

China       153
India       464
Brazil       25
USA          36
Ethiopia    115
Egypt       103
Name: density, dtype: int64


# indexing
# access specific country value

density['Ethiopia']

115


# slicing
density['India':'Egypt']

India       464
Brazil       25
USA          36
Ethiopia    115
Egypt       103
Name: density, dtype: int64


# accessing multiple rows
density[['Ethiopia','Egypt']]

Ethiopia    115
Egypt       103
Name: density, dtype: int64


import pandas as pd
# create lists

countries = ["China","India","Brazil","USA","Ethiopia","Egypt"]
capitals = ["Beijing","New Delhi","Brasil","Washington DC","Addis Ababa","Cairo"]
density = [153,464,25,36,115,103]

# create a dictionary of lists
my_dict = {'countries':countries, 'capitals':capitals, 'density':density}

# convert the dictionary of lists into a DataFrame
df = pd.DataFrame(my_dict)

# display the DataFrame
df['density']

0    153
1    464
2     25
3     36
4    115
5    103
Name: density, dtype: int64


type(df['density'])

pandas.core.series.Series


import pandas as pd

# import data saved in file
df = pd.read_csv('vehicle_data.csv')


df.head(10)


df.tail(10)


df.shape

(300, 18)


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   title          300 non-null    object 
 1   category       300 non-null    object 
 2   region         300 non-null    object 
 3   parent_region  294 non-null    object 
 4   condition      295 non-null    object 
 5   attrs          299 non-null    object 
 6   brand          299 non-null    object 
 7   color          244 non-null    object 
 8   model          283 non-null    object 
 9   yom            297 non-null    float64
 10  mileage        209 non-null    float64
 11  body_type      42 non-null     object 
 12  fuel           82 non-null     object 
 13  drive_train    41 non-null     object 
 14  trans          249 non-null    object 
 15  seat           31 non-null     float64
 16  registered     175 non-null    object 
 17  price          300 non-null    int64  
dtypes: float64(3), int64(1), object(14)
memory usage: 42.3+ KB


df.dtypes

title             object
category          object
region            object
parent_region     object
condition         object
attrs             object
brand             object
color             object
model             object
yom              float64
mileage          float64
body_type         object
fuel              object
drive_train       object
trans             object
seat             float64
registered        object
price              int64
dtype: object


df['yom'].isnull().sum()

3


df[df['yom'].isnull()]


df['parent_region'].value_counts(normalize=True)

Mombasa        0.500000
Nairobi        0.431973
Kiambu         0.023810
Nakuru         0.013605
Machakos       0.006803
Kisumu         0.003401
Uasin Gishu    0.003401
Meru           0.003401
Kirinyaga      0.003401
Kajiado        0.003401
Mvita          0.003401
Nyali          0.003401
Name: parent_region, dtype: float64


df.isnull().sum()

title              0
category           0
region             0
parent_region      6
condition          5
attrs              1
brand              1
color             56
model             17
yom                3
mileage           91
body_type        258
fuel             218
drive_train      259
trans             51
seat             269
registered       125
price              0
dtype: int64


df.index.values

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
       182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
       195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
       208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
       221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
       234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
       247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259,
       260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272,
       273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285,
       286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
       299], dtype=int64)


df.columns

Index(['title', 'category', 'region', 'parent_region', 'condition', 'attrs',
       'brand', 'color', 'model', 'yom', 'mileage', 'body_type', 'fuel',
       'drive_train', 'trans', 'seat', 'registered', 'price'],
      dtype='object')


# lets create a toy dataset

import pandas as pd
# create lists

countries = ["China","India","Brazil","USA","Ethiopia","Egypt"]
capitals = ["Beijing","New Delhi","Brasil","Washington DC","Addis Ababa","Cairo"]
density = [153,464,25,36,115,103]

# create a dictionary of lists
my_dict = {'countries':countries, 'capitals':capitals, 'density':density}

# convert the dictionary of lists into a DataFrame
toy = pd.DataFrame(my_dict,index=countries)

toy


toy.index.values

array(['China', 'India', 'Brazil', 'USA', 'Ethiopia', 'Egypt'],
      dtype=object)


toy.loc['Brazil']

countries    Brazil
capitals     Brasil
density          25
Name: Brazil, dtype: object


toy.loc[:,'capitals']

China             Beijing
India           New Delhi
Brazil             Brasil
USA         Washington DC
Ethiopia      Addis Ababa
Egypt               Cairo
Name: capitals, dtype: object


toy.loc[['India','Ethiopia','Egypt']]


toy.loc[:,['countries','density']]


toy.loc[
    ['Brazil','USA'],
    ['countries','density']
]


toy.loc['Brazil':'Ethiopia']


toy.loc[:,'capitals':'density']


toy.loc[
    'Brazil':'Ethiopia',
    'capitals':'density'
]


toy.iloc[1] # the entry at row 1

countries        India
capitals     New Delhi
density            464
Name: India, dtype: object


toy.iloc[-1] # negative indexing

countries    Egypt
capitals     Cairo
density        103
Name: Egypt, dtype: object


toy.iloc[[1,3,4]] # index multiple rows


# index all rows and first and third columns
toy.iloc[:, [0,2]]


# slice from first row to the second row
toy.iloc[0:2]


toy.iloc[0:4,0:2]


toy.at['Egypt','density']

103


toy.iat[-1,-1]

103


toy['countries']

China          China
India          India
Brazil        Brazil
USA              USA
Ethiopia    Ethiopia
Egypt          Egypt
Name: countries, dtype: object


toy[['countries','density']]


toy[0:2]


toy["China":'Brazil']


# sort the toy dataset in alphabetical order by name of country
toy.sort_values(by="countries")


# sort the toy dataset such that the country with highest density is on top
toy.sort_values(by='density',ascending=False)

toy


toy.sort_values(by='density',ascending=False,inplace=True)

toy


toy.sort_index()


toy.sort_index(ascending=False)


# keep the changes
toy.sort_index(inplace=True)

toy


# create a mask
mask = toy['density'] > 100
mask

Brazil      False
China        True
Egypt        True
Ethiopia     True
India        True
USA         False
Name: density, dtype: bool


toy[mask]


df.head()


df[df['yom'] > 2015]


df[(df['yom']>2015) & (df['category'] == 'Cars')]


colors = ['Black','White','Red']

df[df['color'].isin(colors)]


df[df['title'].str.startswith('M')]


df[df['title'].str.contains('Benz')]


df[~df['title'].str.contains('Toyota')]


df.query("category == 'Cars' and price > 5_000_000")


# 5 vehicles with highest price

df.nlargest(5,'price')


# 5 vehicles with the lowest prices
df.nsmallest(5,'price')


# solution to challenge 1
df.loc[:, ['title','region','brand','price']]


# solution
# first create a mask using the conditions
mask = (df['brand'] == "Toyota") & (df['region'] == "Kilimani")

# apply the mask on the dataframe to filter only True values
df[mask]


### solution to challenge 3

df[df['model'].str.contains('CX', na=False)]


## Challenge 4 solution

df[(df['brand']=="Mercedes-Benz") & (df['color'] != 'Red')].sort_values(by='price',ascending=False)


#### Challenge 5 solution
df[
    ((df['brand'] == 'Mercedes-Benz') | (df['brand'] == 'BMW')) &
    (df['title'].str.contains('Black') | (df['title'].str.contains('White'))) &
    (df['region'].isin(['Kilimani','Lavington','Langata','Westlands']))
].sort_values(by=['brand','price'],ascending=[True,False])


### Challenge 6 Solution

# your solution here



#


df.isnull().sum()

title              0
category           0
region             0
parent_region      6
condition          5
attrs              1
brand              1
color             56
model             17
yom                3
mileage           91
body_type        258
fuel             218
drive_train      259
trans             51
seat             269
registered       125
price              0
dtype: int64


df[df['yom'].isnull()]


# Isuzu trucks modal yom

# filter 
df[(df['category']=='Trucks & Trailers')&(df['brand']=='Isuzu')].head()


df[(df['category']=='Trucks & Trailers')&(df['brand']=='Isuzu')]['yom'].value_counts()

2015.0    4
2013.0    3
2012.0    2
2019.0    1
2014.0    1
2006.0    1
2007.0    1
1994.0    1
1990.0    1
1999.0    1
2016.0    1
2010.0    1
Name: yom, dtype: int64


df.at[109,'yom'] = 2014


df.at[109,'yom']

2014.0


df[(df['category']=='Cars')&(df['model']=='Outback')]['yom'].value_counts().index.tolist()

[2015.0, 2013.0, 2014.0]


df.at[201,'yom']

nan


df.at[201,'yom'] = 2015


df.drop(261,inplace=True)


df.isnull().sum()

title              0
category           0
region             0
parent_region      6
condition          4
attrs              0
brand              0
color             55
model             16
yom                0
mileage           90
body_type        257
fuel             217
drive_train      258
trans             50
seat             268
registered       124
price              0
dtype: int64


df['color'].value_counts()

White         68
Black         57
Silver        27
Blue          25
Gray          16
Red           11
Brown         10
Pink           5
Purple         5
Green          4
Pearl          3
Gold           3
Other          3
Beige          2
Off white      2
Grey           1
Burgandy       1
Matt Black     1
Name: color, dtype: int64


df['color'].fillna('Not Specified', inplace=True)


df['color'].value_counts()

White            68
Black            57
Not Specified    55
Silver           27
Blue             25
Gray             16
Red              11
Brown            10
Purple            5
Pink              5
Green             4
Gold              3
Pearl             3
Other             3
Beige             2
Off white         2
Grey              1
Burgandy          1
Matt Black        1
Name: color, dtype: int64


df[df['condition'].isnull()]


df['condition'].value_counts()

Foreign Used    170
Kenyan Used      65
Used             35
Brand New        25
Name: condition, dtype: int64


# the modal condition for vehicles under the Trucks & Trailers category
trucks_mode = df[df['category'] == "Trucks & Trailers"]['condition'].mode()
trucks_mode

0    Used
Name: condition, dtype: object


df['condition'].replace(np.nan,'Used',inplace=True)


# drop when any columns in cols list are missing
cols=['model','mileage','body_type',
      'fuel','drive_train','trans',
      'seat','registered']
df.dropna(axis='index', how='any', subset=cols).head()


# drop only when any 5 columns in cols list are missing
cols=['model','mileage','body_type',
      'fuel','drive_train','trans',
      'seat','registered']

df.dropna(axis='index', how='any', subset=cols,thresh=5).head()


# drop only when all columns in cols list are missing
cols=['model','mileage','body_type',
      'fuel','drive_train','trans',
      'seat','registered']
df.dropna(axis='index', how='all', subset=cols).head()


# drop only when all columns in cols list are missing
cols=['model','mileage','body_type',
      'fuel','drive_train','trans',
      'seat','registered']
df.dropna(axis='index', how='all', subset=cols,inplace=True)


df.isnull().sum()

title              0
category           0
region             0
parent_region      0
condition          0
attrs              0
brand              0
color              0
model              1
yom                0
mileage           75
body_type        242
fuel             202
drive_train      243
trans             35
seat             253
registered       109
price              0
dtype: int64


threshold = int(0.6* len(df))
threshold

170


cols=['model','body_type',
      'fuel','drive_train','trans',
      'seat','registered']
df.dropna(axis='columns',thresh=threshold,inplace=True)


df.isnull().sum()

title              0
category           0
region             0
parent_region      0
condition          0
attrs              0
brand              0
color              0
model              1
yom                0
mileage           75
trans             35
registered       109
price              0
dtype: int64


df.groupby('condition')['registered'].value_counts()

condition     registered
Brand New     No             6
              Yes            6
Foreign Used  Yes           80
              No            49
Kenyan Used   Yes           34
Name: registered, dtype: int64


df[df['condition'] == "Kenyan Used"]['registered'].value_counts()

Yes    65
Name: registered, dtype: int64


reg_df = df[df['condition'] == "Kenyan Used"]['registered'].apply(lambda x: 'Yes' if x is np.nan else x)

df.loc[ df['condition'] == "Kenyan Used", 'registered'] = reg_df


df.groupby('condition')['registered'].value_counts()

condition     registered
Brand New     No             6
              Yes            6
Foreign Used  Yes           80
              No            49
Kenyan Used   Yes           65
Name: registered, dtype: int64


df.isnull().sum()

title             0
category          0
region            0
parent_region     0
condition         0
attrs             0
brand             0
color             0
model             1
yom               0
mileage          75
trans            35
registered       78
price             0
dtype: int64


import matplotlib.pyplot as plt


cars_df = df[df['category']=='Cars'].copy()
cars_df.head()


plt.scatter("yom","price",data=cars_df)
plt.title("Year Against Price")
plt.xlabel("Year of Make")
plt.ylabel("Price in Kshs Millions")
plt.grid()


# ensure that the yom column is numeric 
cars_df.dtypes

title             object
category          object
region            object
parent_region     object
condition         object
attrs             object
brand             object
color             object
model             object
yom              float64
mileage          float64
trans             object
registered        object
price              int64
dtype: object


# get the current year
from datetime import date

cur_year = date.today().year
cur_year

2022


cur_year - cars_df['yom']

0       6.0
1       8.0
3       8.0
4       8.0
5       9.0
       ... 
293     8.0
294     8.0
295    14.0
298    13.0
299     7.0
Name: yom, Length: 221, dtype: float64


# to create a new column simply put the name of the new column withing square brackets
cars_df['age_years'] = cur_year - cars_df['yom']
cars_df.head()


cars_df['age_years'].astype(int)

0       6
1       8
3       8
4       8
5       9
       ..
293     8
294     8
295    14
298    13
299     7
Name: age_years, Length: 221, dtype: int32


# casting the dtype from float to int
cars_df['age_years'] = cars_df['age_years'].astype(int)


cars_df['age_years'].mean()

9.027149321266968


cars_df[['age_years','price']].corr()


# there is a weak negative linear relationship between age_years and price


from matplotlib import pyplot as plt

plt.scatter('age_years', 'price', data=cars_df)

<matplotlib.collections.PathCollection at 0x1a4655b5b80>


cars_df['price_log'] = np.log(cars_df['price'])


cars_df.corr()


from matplotlib import pyplot as plt

plt.scatter('yom', 'price_log', data=cars_df)

<matplotlib.collections.PathCollection at 0x1a465623400>

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
0	Toyota Land Cruiser Prado 2016 Black	Cars	Mvita	Mombasa	Foreign Used	First registration, No faults	Toyota	Black	Land Cruiser Prado	2016.0	87000.0	NaN	NaN	NaN	Automatic	NaN	NaN	6500000
1	Mazda Demio 2014 Brown	Cars	Langata	Nairobi	Foreign Used	First owner, No faults	Mazda	Brown	Demio	2014.0	92000.0	NaN	NaN	NaN	Automatic	NaN	Yes	970000
2	Clean NV300 Caravan 2014 Model Dielsel 16 Seater	Buses & Microbuses	Kilimani	Nairobi	Foreign Used	Nissan	Nissan	NaN	Caravan (Urvan)	2014.0	180000.0	NaN	NaN	NaN	NaN	NaN	NaN	2550000
3	Toyota Crown 2014 Pearl	Cars	Kilimani	Nairobi	Foreign Used	No faults	Toyota	Pearl	Crown	2014.0	75000.0	NaN	NaN	NaN	Automatic	NaN	No	2100000
4	Honda Fit 2014 Black	Cars	Mvita	Mombasa	Foreign Used	No faults	Honda	Black	Fit	2014.0	58000.0	NaN	NaN	NaN	Automatic	NaN	Yes	880000

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
0	Toyota Land Cruiser Prado 2016 Black	Cars	Mvita	Mombasa	Foreign Used	First registration, No faults	Toyota	Black	Land Cruiser Prado	2016.0	87000.0	NaN	NaN	NaN	Automatic	NaN	NaN	6500000
1	Mazda Demio 2014 Brown	Cars	Langata	Nairobi	Foreign Used	First owner, No faults	Mazda	Brown	Demio	2014.0	92000.0	NaN	NaN	NaN	Automatic	NaN	Yes	970000
2	Clean NV300 Caravan 2014 Model Dielsel 16 Seater	Buses & Microbuses	Kilimani	Nairobi	Foreign Used	Nissan	Nissan	NaN	Caravan (Urvan)	2014.0	180000.0	NaN	NaN	NaN	NaN	NaN	NaN	2550000
3	Toyota Crown 2014 Pearl	Cars	Kilimani	Nairobi	Foreign Used	No faults	Toyota	Pearl	Crown	2014.0	75000.0	NaN	NaN	NaN	Automatic	NaN	No	2100000
4	Honda Fit 2014 Black	Cars	Mvita	Mombasa	Foreign Used	No faults	Honda	Black	Fit	2014.0	58000.0	NaN	NaN	NaN	Automatic	NaN	Yes	880000
5	Mitsubishi Delica 2013 White	Cars	Mvita	Mombasa	Foreign Used	First registration, No faults, Unpainted	Mitsubishi	White	Delica	2013.0	88000.0	NaN	NaN	NaN	Automatic	NaN	Yes	630000
6	New Toyota Premio 2013 Red	Cars	Mvita	Mombasa	Brand New	No faults, First registration	Toyota	Red	Premio	2013.0	45000.0	NaN	NaN	NaN	Automatic	NaN	No	1500000
7	Toyota Sienta 2014 1.5 AWD Gray	Cars	Ganjoni	Mombasa	Foreign Used	No faults	Toyota	Gray	Sienta	2014.0	51000.0	Minivan	Petrol	All Wheel	Automatic	7.0	Yes	1200000
8	BMW X4 2015 xDrive35i Black	Cars	Mombasa CBD	Mombasa	Foreign Used	No faults	BMW	Black	X4	2015.0	63128.0	NaN	NaN	NaN	Automatic	NaN	No	5800000
9	Mitsubishi Outlander 2015 White	Cars	Lavington	Nairobi	Foreign Used	Unpainted, Original parts, No faults	Mitsubishi	White	Outlander	2015.0	40382.0	SUV	Petrol	Front Wheel	Automatic	NaN	Yes	2900000

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
290	Lexus LS 2011 460 AWD Black	Cars	Lavington	Nairobi	Kenyan Used	Lexus	Lexus	Black	LS	2011.0	88201.0	Sedan	Petrol	All Wheel	Automatic	5.0	NaN	3100000
291	Toyota Ractis 2015 Black	Cars	Mombasa CBD	Mombasa	Foreign Used	Original parts	Toyota	Black	Ractis	2015.0	58964.0	NaN	NaN	NaN	Automatic	NaN	Yes	1049000
292	Toyota Wish 2011 Silver	Cars	Ridgeways	Nairobi	Kenyan Used	First owner, Unpainted, Original parts	Toyota	Silver	Wish	2011.0	107809.0	NaN	NaN	NaN	Automatic	NaN	Yes	1030000
293	Mitsubishi L200 2014 Gold	Cars	Roysambu	Nairobi	Kenyan Used	First owner, Original parts, First registration	Mitsubishi	Gold	L200	2014.0	NaN	NaN	NaN	NaN	Manual	NaN	Yes	1200000
294	Subaru Outback 2014 White	Cars	Mvita	Mombasa	Foreign Used	No faults	Subaru	White	Outback	2014.0	63142.0	NaN	NaN	NaN	Automatic	NaN	NaN	2500000
295	Toyota Allion 2008 Silver	Cars	Ganjoni	Mombasa	Kenyan Used	No faults	Toyota	Silver	Allion	2008.0	176549.0	NaN	NaN	NaN	Automatic	NaN	Yes	860000
296	Mitsubishi Hd	Trucks & Trailers	Kisauni	Mombasa	Used	Used	Mitsubishi	NaN	NaN	2006.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1300000
297	Mitsubishi Fuso Refrigerated	Trucks & Trailers	Kisauni	Mombasa	Used	Used	Mitsubishi	NaN	Canter	2014.0	NaN	NaN	Diesel	NaN	NaN	NaN	NaN	2200000
298	Toyota Ractis 2009 Black	Cars	Ridgeways	Nairobi	Kenyan Used	First owner, No faults, Original parts	Toyota	Black	Ractis	2009.0	120120.0	NaN	NaN	NaN	Automatic	NaN	Yes	550000
299	Subaru Forester 2015 Matt Black	Cars	Mombasa CBD	Mombasa	Foreign Used	Unpainted, Original parts, First registration	Subaru	Matt Black	Forester	2015.0	86910.0	NaN	NaN	NaN	Automatic	NaN	Yes	2450000

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
109	Isuzu Truck .	Trucks & Trailers	Donholm	Nairobi	NaN	Isuzu	Isuzu	NaN	N Series	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	810000
201	Subaru Outback 2014 black	Cars	Nairobi	NaN	NaN	Outback	Subaru	Black	Outback	NaN	180097.0	NaN	NaN	NaN	Automatic	NaN	NaN	1649999
261	Single Diff Volve FL10	Trucks & Trailers	Embakasi	Nairobi	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	400000

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
0	Toyota Land Cruiser Prado 2016 Black	Cars	Mvita	Mombasa	Foreign Used	First registration, No faults	Toyota	Black	Land Cruiser Prado	2016.0	87000.0	NaN	NaN	NaN	Automatic	NaN	NaN	6500000
1	Mazda Demio 2014 Brown	Cars	Langata	Nairobi	Foreign Used	First owner, No faults	Mazda	Brown	Demio	2014.0	92000.0	NaN	NaN	NaN	Automatic	NaN	Yes	970000
2	Clean NV300 Caravan 2014 Model Dielsel 16 Seater	Buses & Microbuses	Kilimani	Nairobi	Foreign Used	Nissan	Nissan	NaN	Caravan (Urvan)	2014.0	180000.0	NaN	NaN	NaN	NaN	NaN	NaN	2550000
3	Toyota Crown 2014 Pearl	Cars	Kilimani	Nairobi	Foreign Used	No faults	Toyota	Pearl	Crown	2014.0	75000.0	NaN	NaN	NaN	Automatic	NaN	No	2100000
4	Honda Fit 2014 Black	Cars	Mvita	Mombasa	Foreign Used	No faults	Honda	Black	Fit	2014.0	58000.0	NaN	NaN	NaN	Automatic	NaN	Yes	880000

	countries	capitals	density
0	China	Beijing	153
1	India	New Delhi	464
2	Brazil	Brasil	25
3	USA	Washington DC	36
4	Ethiopia	Addis Ababa	115
5	Egypt	Cairo	103

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
78	Mercedes-Benz E250 2014 SV Premium Black	Cars	Nairobi Central	Nairobi	Kenyan Used	Unpainted, No faults, Original parts	Mercedes-Benz	Black	E250	2014.0	100555.0	NaN	NaN	NaN	Automatic	NaN	Yes	3200000
179	Mercedes-Benz B-Class 2009 White	Cars	Lavington	Nairobi	Kenyan Used	Mercedes-Benz	Mercedes-Benz	White	B-Class	2009.0	154155.0	NaN	Petrol	NaN	Automatic	NaN	NaN	699999
195	Mercedes-Benz Actros	Trucks & Trailers	Thome	Nairobi	Used	Used	Mercedes-Benz	NaN	NaN	2013.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	7500000
207	Mercedes-Benz C180 2014 Black	Cars	Kilimani	Nairobi	Foreign Used	No faults	Mercedes-Benz	Black	C180	2014.0	82000.0	NaN	NaN	NaN	Automatic	NaN	Yes	2500000
210	Mercedes-Benz C200 2008 White	Cars	Langata	Nairobi	Kenyan Used	Mercedes-Benz	Mercedes-Benz	White	C200	2008.0	132489.0	NaN	Petrol	NaN	Automatic	NaN	NaN	1400000
219	Mercedes-Benz C-Class 2011 C 200 (C204) White	Cars	Lavington	Nairobi	Kenyan Used	Mercedes-Benz	Mercedes-Benz	White	C-Class	2011.0	76000.0	Coupe	Petrol	Rear Wheel	Automatic	5.0	NaN	1650000
238	Mercedes-Benz B-Class 2014 Silver	Cars	Mvita	Mombasa	Foreign Used	First owner, No faults	Mercedes-Benz	Silver	B-Class	2014.0	90000.0	NaN	NaN	NaN	Automatic	NaN	NaN	1650000
247	Mercedes-Benz E250 2014 Black	Cars	Nairobi Central	Nairobi	Foreign Used	No faults	Mercedes-Benz	Black	E250	2014.0	49000.0	NaN	NaN	NaN	Automatic	NaN	No	3900000
274	Mercedes-Benz C200 2015 Black	Cars	Mombasa CBD	Mombasa	Foreign Used	No faults	Mercedes-Benz	Black	C200	2015.0	56800.0	NaN	NaN	NaN	Automatic	NaN	Yes	3850000

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
22	Lexus RX 2016 Black	Cars	Mombasa CBD	Mombasa	Foreign Used	No faults	Lexus	Black	RX	2016.0	NaN	NaN	NaN	NaN	Automatic	NaN	Yes	14500000
148	Mazda Bongo	Buses & Microbuses	Ridgeways	Nairobi	Foreign Used	Unpainted, No faults	Mazda	White	Bongo	2014.0	127800.0	NaN	NaN	NaN	Automatic	NaN	NaN	11200000
265	New Hyundai Palisade 2021 White	Cars	Mombasa Road	Nairobi	Brand New	No faults	Hyundai	White	Palisade	2021.0	200.0	NaN	NaN	NaN	Automatic	NaN	Yes	9500000
224	Toyota Hilux 2016 Black	Cars	Mombasa CBD	Mombasa	Foreign Used	First registration	Toyota	Black	Hilux	2016.0	36000.0	NaN	NaN	NaN	Automatic	NaN	No	9000000
156	Toyota Land Cruiser 2010 4.6 V8 ZX Black	Cars	Runda	Nairobi	Foreign Used	No faults	Toyota	Black	Land Cruiser	2010.0	NaN	SUV	Petrol	4x4	Automatic	8.0	No	8799999

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
111	Mitsubishi Lancer / Cedia 2002 White	Cars	Syokimau	Machakos	Kenyan Used	No faults	Mitsubishi	White	Lancer / Cedia	2002.0	NaN	NaN	NaN	NaN	Automatic	NaN	Yes	250000
85	Volkswagen Golf 2012 Blue	Cars	Ridgeways	Nairobi	Kenyan Used	First owner, After crash, Original parts	Volkswagen	Blue	Golf	2012.0	49252.0	NaN	NaN	NaN	Automatic	NaN	Yes	260000
246	Mazda Familia 1998 Black	Cars	Ridgeways	Nairobi	Kenyan Used	Mazda	Mazda	Black	Familia	1998.0	202745.0	NaN	Petrol	NaN	Manual	NaN	NaN	300000
38	Land Rover Range Rover 1979 Green	Cars	Kisumu Central	Kisumu	Kenyan Used	First owner	Land Rover	Green	Range Rover	1979.0	NaN	NaN	NaN	NaN	Manual	NaN	Yes	400000
261	Single Diff Volve FL10	Trucks & Trailers	Embakasi	Nairobi	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	400000

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
15	Mazda CX-5 2014 Sport FWD Blue	Cars	Ridgeways	Nairobi	Foreign Used	No faults	Mazda	Blue	CX-5	2014.0	NaN	SUV	Petrol	Front Wheel	Automatic	5.0	NaN	2060000
24	Mazda CX-5 2014 Red	Cars	Nairobi Central	Nairobi	Foreign Used	First registration	Mazda	Red	CX-5	2014.0	85239.0	NaN	NaN	NaN	Automatic	NaN	Yes	2250000
52	Mazda CX-5 2016 Red	Cars	Mombasa CBD	Mombasa	Foreign Used	First registration	Mazda	Red	CX-5	2016.0	51238.0	NaN	NaN	NaN	Automatic	NaN	No	2900000
58	Mazda CX-5 2014 Grand Touring FWD Red	Cars	Mombasa CBD	Mombasa	Foreign Used	No faults	Mazda	Red	CX-5	2014.0	NaN	SUV	Petrol	NaN	Automatic	5.0	No	2300000
94	Mazda CX-3 2016 Red	Cars	Mombasa CBD	Mombasa	Foreign Used	First registration	Mazda	Red	CX-3	2016.0	76000.0	NaN	NaN	NaN	Automatic	NaN	No	1800000
183	New Mazda CX-5 2014 Grand Touring AWD Blue	Cars	Nairobi Central	Nairobi	Brand New	No faults	Mazda	Blue	CX-5	2014.0	115000.0	SUV	Petrol	All Wheel	Automatic	5.0	No	2100000

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
282	BMW 7 Series 2013 White	Cars	Westlands	Nairobi	Foreign Used	No faults	BMW	White	7 Series	2013.0	40000.0	NaN	NaN	NaN	Automatic	NaN	Yes	3800000
207	Mercedes-Benz C180 2014 Black	Cars	Kilimani	Nairobi	Foreign Used	No faults	Mercedes-Benz	Black	C180	2014.0	82000.0	NaN	NaN	NaN	Automatic	NaN	Yes	2500000
219	Mercedes-Benz C-Class 2011 C 200 (C204) White	Cars	Lavington	Nairobi	Kenyan Used	Mercedes-Benz	Mercedes-Benz	White	C-Class	2011.0	76000.0	Coupe	Petrol	Rear Wheel	Automatic	5.0	NaN	1650000
210	Mercedes-Benz C200 2008 White	Cars	Langata	Nairobi	Kenyan Used	Mercedes-Benz	Mercedes-Benz	White	C200	2008.0	132489.0	NaN	Petrol	NaN	Automatic	NaN	NaN	1400000
179	Mercedes-Benz B-Class 2009 White	Cars	Lavington	Nairobi	Kenyan Used	Mercedes-Benz	Mercedes-Benz	White	B-Class	2009.0	154155.0	NaN	Petrol	NaN	Automatic	NaN	NaN	699999

	title	category	region	parent_region	condition	attrs	brand	color	model	yom	mileage	body_type	fuel	drive_train	trans	seat	registered	price
14	Very Clean Isuzu FRR Truck 2015 Model	Trucks & Trailers	Thome	Nairobi	Used	Used	Isuzu	White	F SERIES	2015.0	NaN	NaN	Diesel	NaN	Manual	NaN	NaN	3520000
36	Isuzu Elf,Year 2014manual Transmission	Trucks & Trailers	Mombasa CBD	Mombasa	Brand New	Brand New	Isuzu	NaN	NaN	2013.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2150000
41	Isuzu Nkr. Yr 2019	Trucks & Trailers	Ridgeways	Nairobi	Used	Used	Isuzu	White	N Series	2019.0	NaN	NaN	Diesel	NaN	Manual	NaN	NaN	3050000
46	Isuzu Frr Kbw	Trucks & Trailers	Eldoret CBD	Uasin Gishu	Used	Used	Isuzu	White	F SERIES	2013.0	NaN	NaN	Diesel	NaN	Manual	NaN	NaN	2600000
62	Isuzu Elf,Year 2015 Manual	Trucks & Trailers	Mombasa CBD	Mombasa	Brand New	Brand New	Isuzu	NaN	NaN	2014.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	3650000

	yom	mileage	price	age_years	price_log
yom	1.000000	-0.290489	0.286765	-1.000000	0.430495
mileage	-0.290489	1.000000	-0.188173	0.290489	-0.198183
price	0.286765	-0.188173	1.000000	-0.286765	0.879782
age_years	-1.000000	0.290489	-0.286765	1.000000	-0.430495
price_log	0.430495	-0.198183	0.879782	-0.430495	1.000000

Getting Started with Python¶

Data Analysis With Python Pandas¶

Author : Waweru Kennedy¶

Date: 14/3/2022¶

Outline¶

Course Material¶

- Student Workbook¶

What is Pandas?¶

Pandas DataFrame Object¶

Installation¶

Pandas Data Structures¶

Creating a DataFrame¶

Syntax¶

Create a DataFrame From Lists¶

Create a DataFrame from a Dictionary¶

Create a DataFrame From an External File¶

Pandas Series Object¶

Create a Pandas Series from Scratch¶

Using the Vehicle Dataset¶

Exploring a DataFrame¶

Display the first 10 rows of a DataFrame¶

Pandas - Analyzing DataFrames¶

info()¶

Results Explained¶

Types of Statistical Data¶

Pandas Indexing¶

The Pandas Index Object¶

Indexers: loc, iloc, at and iat¶

.loc¶

2. A list or array of labels¶

3. Slicing¶

.iloc (integer location)¶

1. Indexing¶

1. Slicing¶

.at¶

.iat¶

Indexing Operator []¶

Selecting Columns¶

Selecting Rows¶

Sorting¶

sort values¶

sort index¶

sort multiple columns¶

Update the original DataFrame¶

Sort by index¶

Filtering¶

1. Logical operators¶

Using the Vehicle Dataset¶

Select only the vehicles that year of make is after 2015¶

2. Multiple logical operators¶

Select only vehicles that were made after 2015 and are category car¶

3. isin¶

4. Str Accessor¶

5. Tilde ~¶

6. Query¶

7. nlargest and nsmallest¶

Select 5 cars with the lowest price¶

Challenges¶

Challenge 1: use .loc to retrieve all the rows for columns:¶

Challenge 2: Use a filter to select all the vehicles that are Toyota brand and are within the region kilimani¶

Challenge 3. Select all the vehicles that contain the word 'CX' in their model¶

Challenge 4. Use a filter to return only the vehicles:¶

Challenge 5: Write a filter to select vehicles¶

Challenge 6: Write a filter to select vehicles¶

Cleaning Data : Casting Datatypes and Handling Missing values¶

Cleaning Empty Cells¶

Discard a specific row from the DataFrame¶

Replace with mean, median or mode¶

syntax¶

To persist the changes specify inplace=True¶

Drop columns if majority of values are missing¶

Let's say we want to generate a scatter plot of car_price vs car_mileage¶

Create a new Column from an Existing Column¶

Create a simple scatter plot of age_years vs price¶

Previous : Just Enough Numpy¶

Next : [Data Visualization with Python Matplotlib]¶

References¶

- Student Workbook ¶

`info()`¶

Indexers: `loc`, `iloc`, `at` and `iat`¶

`.loc`¶

`.iloc` (integer location)¶

`.at`¶

Indexing Operator `[]`¶

3. `isin`¶

5. Tilde `~`¶

7. `nlargest` and `nsmallest`¶

Replace with `mean`, `median` or `mode`¶

To persist the changes specify `inplace=True`¶

Create a simple scatter plot of `age_years` vs `price`¶

Previous : Just Enough Numpy ¶