{ "cells": [ { "cell_type": "markdown", "id": "fe5a81a5", "metadata": {}, "source": [ "# Getting Started with Python\n", "\n", "## Data Analysis With Python Pandas\n", "\n", "#### __Author__ : Your Name\n", "\n", "#### __Date__: 14/3/2022" ] }, { "cell_type": "markdown", "id": "2f370c10", "metadata": {}, "source": [ "#### Create a DataFrame From Lists\n", "\n", "Write your notes here" ] }, { "cell_type": "code", "execution_count": null, "id": "30af5714", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# pd.read_csv()" ] }, { "cell_type": "code", "execution_count": 2, "id": "ae8c03b6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Joy 3\n", "Jack 5\n", "John 7\n", "Jane 6\n", "Name: age, dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create a pandas series\n", "s1 = pd.Series([3,5,7,6],index=[\"Joy\",\"Jack\",\"John\",\"Jane\"],name=\"age\")\n", "s1" ] }, { "cell_type": "code", "execution_count": 4, "id": "ecda38e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Joy F\n", "Jack M\n", "John M\n", "Jane F\n", "Name: sex, dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2 = pd.Series([\"F\",\"M\",\"M\",\"F\"],index=[\"Joy\",\"Jack\",\"John\",\"Jane\"],name=\"sex\")\n", "s2" ] }, { "cell_type": "code", "execution_count": 10, "id": "69c4a8ef", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Joy Cyclist\n", "Jack Singer\n", "John Athlete\n", "Jane Story Teller\n", "Name: interests, dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s3 = pd.Series([\"Cyclist\",\"Singer\",\"Athlete\",\"Story Teller\"],index=[\"Joy\",\"Jack\",\"John\",\"Jane\"],name=\"interests\")\n", "s3" ] }, { "cell_type": "code", "execution_count": 11, "id": "7a6cab30", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Joy', 'Jack', 'John', 'Jane'], dtype=object)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s3.index.values" ] }, { "cell_type": "code", "execution_count": 15, "id": "4ab934c6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(3, 'F', 'Cyclist')\n", "(5, 'M', 'Singer')\n", "(7, 'M', 'Athlete')\n", "(6, 'F', 'Story Teller')\n" ] } ], "source": [ "zip1 = zip(s1,s2,s3)\n", "for elem in zip1:\n", " print(elem)" ] }, { "cell_type": "code", "execution_count": 13, "id": "30e6ccf0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
03FCyclist
15MSinger
27MAthlete
36FStory Teller
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 3 F Cyclist\n", "1 5 M Singer\n", "2 7 M Athlete\n", "3 6 F Story Teller" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mydf = pd.DataFrame(data=zip(s1,s2,s3))\n", "mydf" ] }, { "cell_type": "code", "execution_count": null, "id": "0cb83bdb", "metadata": {}, "outputs": [], "source": [ "mydf = pd.DataFrame(data=[s1,s2,s3], columns=)\n", "mydf" ] }, { "cell_type": "markdown", "id": "89c7de8e", "metadata": {}, "source": [ "#### Create a DataFrame from a Dictionary" ] }, { "cell_type": "code", "execution_count": null, "id": "54dffe28", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "41c8fa6c", "metadata": {}, "source": [ "#### Create a DataFrame From an External File\n" ] }, { "cell_type": "code", "execution_count": 18, "id": "db6d0b4f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "working directory C:\\Users\\Just Nick\\Desktop\\Analysis\\Denaco\\Python\n", "\n", "files ['.ipynb_checkpoints', 'bmi-solution.ipynb', 'comprehensive-guide-to-pandas.ipynb', 'conditional expressions in python.ipynb', 'data-analysis-with-python-pandas.ipynb', 'denaco-manu-python-curriculum - Sheet1.pdf', 'df-from-dict.py', 'fizz_buzz.py', 'functions.ipynb', 'Intro to python', 'just-enough-numpy.ipynb', 'LCM.ipynb', 'loops and iterations in python.ipynb', 'mastery_of_pandas.ipynb', 'modules and standard library.ipynb', 'our_new_dir', 'pandas tuts', 'Pandas-Demo', 'pandas-student-workbook.ipynb', 'python-resources.txt', 'python-string-formating.py', 'Python.docx', 'student_copy_pandas_workbook.ipynb', 'train.csv', 'users.csv', 'vehicle_data.csv']\n" ] } ], "source": [ "import os\n", "print(\"working directory\",os.getcwd())\n", "print(\"\")\n", "print(\"files\",os.listdir())" ] }, { "cell_type": "code", "execution_count": 20, "id": "beceaee9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlecategoryregionparent_regionconditionattrsbrandcolormodelyommileagebody_typefueldrive_traintransseatregisteredprice
0Toyota Land Cruiser Prado 2016 BlackCarsMvitaMombasaForeign UsedFirst registration, No faultsToyotaBlackLand Cruiser Prado2016.087000.0NaNNaNNaNAutomaticNaNNaN6500000
1Mazda Demio 2014 BrownCarsLangataNairobiForeign UsedFirst owner, No faultsMazdaBrownDemio2014.092000.0NaNNaNNaNAutomaticNaNYes970000
2Clean NV300 Caravan 2014 Model Dielsel 16 SeaterBuses & MicrobusesKilimaniNairobiForeign UsedNissanNissanNaNCaravan (Urvan)2014.0180000.0NaNNaNNaNNaNNaNNaN2550000
3Toyota Crown 2014 PearlCarsKilimaniNairobiForeign UsedNo faultsToyotaPearlCrown2014.075000.0NaNNaNNaNAutomaticNaNNo2100000
4Honda Fit 2014 BlackCarsMvitaMombasaForeign UsedNo faultsHondaBlackFit2014.058000.0NaNNaNNaNAutomaticNaNYes880000
5Mitsubishi Delica 2013 WhiteCarsMvitaMombasaForeign UsedFirst registration, No faults, UnpaintedMitsubishiWhiteDelica2013.088000.0NaNNaNNaNAutomaticNaNYes630000
6New Toyota Premio 2013 RedCarsMvitaMombasaBrand NewNo faults, First registrationToyotaRedPremio2013.045000.0NaNNaNNaNAutomaticNaNNo1500000
7Toyota Sienta 2014 1.5 AWD GrayCarsGanjoniMombasaForeign UsedNo faultsToyotaGraySienta2014.051000.0MinivanPetrolAll WheelAutomatic7.0Yes1200000
8BMW X4 2015 xDrive35i BlackCarsMombasa CBDMombasaForeign UsedNo faultsBMWBlackX42015.063128.0NaNNaNNaNAutomaticNaNNo5800000
9Mitsubishi Outlander 2015 WhiteCarsLavingtonNairobiForeign UsedUnpainted, Original parts, No faultsMitsubishiWhiteOutlander2015.040382.0SUVPetrolFront WheelAutomaticNaNYes2900000
\n", "
" ], "text/plain": [ " title category \\\n", "0 Toyota Land Cruiser Prado 2016 Black Cars \n", "1 Mazda Demio 2014 Brown Cars \n", "2 Clean NV300 Caravan 2014 Model Dielsel 16 Seater Buses & Microbuses \n", "3 Toyota Crown 2014 Pearl Cars \n", "4 Honda Fit 2014 Black Cars \n", "5 Mitsubishi Delica 2013 White Cars \n", "6 New Toyota Premio 2013 Red Cars \n", "7 Toyota Sienta 2014 1.5 AWD Gray Cars \n", "8 BMW X4 2015 xDrive35i Black Cars \n", "9 Mitsubishi Outlander 2015 White Cars \n", "\n", " region parent_region condition \\\n", "0 Mvita Mombasa Foreign Used \n", "1 Langata Nairobi Foreign Used \n", "2 Kilimani Nairobi Foreign Used \n", "3 Kilimani Nairobi Foreign Used \n", "4 Mvita Mombasa Foreign Used \n", "5 Mvita Mombasa Foreign Used \n", "6 Mvita Mombasa Brand New \n", "7 Ganjoni Mombasa Foreign Used \n", "8 Mombasa CBD Mombasa Foreign Used \n", "9 Lavington Nairobi Foreign Used \n", "\n", " attrs brand color \\\n", "0 First registration, No faults Toyota Black \n", "1 First owner, No faults Mazda Brown \n", "2 Nissan Nissan NaN \n", "3 No faults Toyota Pearl \n", "4 No faults Honda Black \n", "5 First registration, No faults, Unpainted Mitsubishi White \n", "6 No faults, First registration Toyota Red \n", "7 No faults Toyota Gray \n", "8 No faults BMW Black \n", "9 Unpainted, Original parts, No faults Mitsubishi White \n", "\n", " model yom mileage body_type fuel drive_train \\\n", "0 Land Cruiser Prado 2016.0 87000.0 NaN NaN NaN \n", "1 Demio 2014.0 92000.0 NaN NaN NaN \n", "2 Caravan (Urvan) 2014.0 180000.0 NaN NaN NaN \n", "3 Crown 2014.0 75000.0 NaN NaN NaN \n", "4 Fit 2014.0 58000.0 NaN NaN NaN \n", "5 Delica 2013.0 88000.0 NaN NaN NaN \n", "6 Premio 2013.0 45000.0 NaN NaN NaN \n", "7 Sienta 2014.0 51000.0 Minivan Petrol All Wheel \n", "8 X4 2015.0 63128.0 NaN NaN NaN \n", "9 Outlander 2015.0 40382.0 SUV Petrol Front Wheel \n", "\n", " trans seat registered price \n", "0 Automatic NaN NaN 6500000 \n", "1 Automatic NaN Yes 970000 \n", "2 NaN NaN NaN 2550000 \n", "3 Automatic NaN No 2100000 \n", "4 Automatic NaN Yes 880000 \n", "5 Automatic NaN Yes 630000 \n", "6 Automatic NaN No 1500000 \n", "7 Automatic 7.0 Yes 1200000 \n", "8 Automatic NaN No 5800000 \n", "9 Automatic NaN Yes 2900000 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('vehicle_data.csv')\n", "df.head(10)" ] }, { "cell_type": "markdown", "id": "c9706333", "metadata": {}, "source": [ "#### Create a Pandas Series from Scratch\n" ] }, { "cell_type": "code", "execution_count": null, "id": "16744fa7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "fa7f5ea1", "metadata": {}, "source": [ "### Using a Real Dataset\n", "\n", "Download the dataset from this link\n", "[Vehicle Dataset](https://dasclab.uonbi.ac.ke/dstraining/vehicle_data.csv)\n", "\n", "#### Exploring a DataFrame\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1fb29c7c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "33206efe", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "0dd92ec2", "metadata": {}, "source": [ "### Pandas - Analyzing DataFrames" ] }, { "cell_type": "code", "execution_count": null, "id": "382ee080", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "54ee53fb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "4fa0efe9", "metadata": {}, "source": [ "### Pandas Indexing" ] }, { "cell_type": "code", "execution_count": null, "id": "0ade3f3d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "506ae99e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "25d9199f", "metadata": {}, "source": [ "### Pandas Sorting" ] }, { "cell_type": "code", "execution_count": null, "id": "a31ebdd6", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f5526724", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "d32509ec", "metadata": {}, "source": [ "### Pandas Filtering" ] }, { "cell_type": "code", "execution_count": null, "id": "dfbf291b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "87a5b6b8", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "93c53dd5", "metadata": {}, "source": [ "### Pandas Challenges" ] }, { "cell_type": "code", "execution_count": null, "id": "bb084b3b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "e0b0754d", "metadata": {}, "source": [ "#### Challenge 6: Write a filter to select vehicles \n", "\n", "- that are of _brand_ __Toyota__ or __Mazda__ or __Subaru__ \n", "\n", "- That mention the colors __'Black'__ or __'White'__ in the _title_\n", "\n", "- Are in either of the following regions: Kilimani, Lavington, Langata, Westlands, Ridgeways\n", "\n", "- Sort them in alphabetical order by _brand_, then by _price_ with most expensive on top" ] }, { "cell_type": "code", "execution_count": null, "id": "9518af1b", "metadata": {}, "outputs": [], "source": [ "### Challenge 6 Solution\n", "\n", "# your solution here\n", "\n", "\n", "\n", "#" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }