diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..d2dd481 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,1007 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Ensuring this repository's animations are correct\n", + "\n", + "This notebook uses pandas to compute the actions illustrated in the other pages. The results here can be compared to the results of the animations to ensure that the animations are showing the correct results." + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd" + ], + "outputs": [], + "execution_count": 1, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Pivot" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "df = pd.DataFrame( {\n", + " 'Name' : [ 'Abe','Abe','Abe','Amy','Amy','Amy' ],\n", + " 'Day' : [ 'Mon','Tue','Wed','Mon','Tue','Wed' ],\n", + " 'Sales' : [ 39, 68, 10, 93, 85, 0 ]\n", + "} )\n", + "df" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 2, + "data": { + "text/plain": [ + " Name Day Sales\n", + "0 Abe Mon 39\n", + "1 Abe Tue 68\n", + "2 Abe Wed 10\n", + "3 Amy Mon 93\n", + "4 Amy Tue 85\n", + "5 Amy Wed 0" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDaySales
0AbeMon39
1AbeTue68
2AbeWed10
3AmyMon93
4AmyTue85
5AmyWed0
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 2, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "df.pivot( index=\"Name\", columns=\"Day\", values=\"Sales\" )" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 3, + "data": { + "text/plain": [ + "Day Mon Tue Wed\n", + "Name \n", + "Abe 39 68 10\n", + "Amy 93 85 0" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DayMonTueWed
Name
Abe396810
Amy93850
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 3, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Pivot Table" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "df = pd.DataFrame( {\n", + " 'First' : [ 'Abe', 'Abe', 'Abe', 'Amy', 'Amy', 'Amy' ],\n", + " 'Last' : [ 'Axel', 'Axel', 'Axel', 'Arnt', 'Arnt', 'Arnt' ],\n", + " 'Customer' : [ 'X Co.','Y Inc.','X Co.','Y Inc.','X Co.','Y Inc.' ],\n", + " 'Sale' : [ 39, 68, 10, 93, 85, 0 ]\n", + "} )\n", + "df" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 12, + "data": { + "text/plain": [ + " First Last Customer Sale\n", + "0 Abe Axel X Co. 39\n", + "1 Abe Axel Y Inc. 68\n", + "2 Abe Axel X Co. 10\n", + "3 Amy Arnt Y Inc. 93\n", + "4 Amy Arnt X Co. 85\n", + "5 Amy Arnt Y Inc. 0" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FirstLastCustomerSale
0AbeAxelX Co.39
1AbeAxelY Inc.68
2AbeAxelX Co.10
3AmyArntY Inc.93
4AmyArntX Co.85
5AmyArntY Inc.0
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 12, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "df.pivot_table( index=[\"First\",\"Last\"], columns=[\"Customer\"], aggfunc=\"sum\" )" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 13, + "data": { + "text/plain": [ + " Sale \n", + "Customer X Co. Y Inc.\n", + "First Last \n", + "Abe Axel 49 68\n", + "Amy Arnt 85 93" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sale
CustomerX Co.Y Inc.
FirstLast
AbeAxel4968
AmyArnt8593
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 13, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Melt" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "df = pd.DataFrame( {\n", + " 'First' : [ 'Abe', 'Amy', 'Art' ],\n", + " 'Last' : [ 'Smith','Sully','Smart' ],\n", + " 'SAT' : [ 123, 234, 345 ],\n", + " 'ACT' : [ 456, 567, 678 ],\n", + " 'GPA' : [ 2.0, 3.0, 4.0 ]\n", + "} )\n", + "df" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 14, + "data": { + "text/plain": [ + " First Last SAT ACT GPA\n", + "0 Abe Smith 123 456 2.0\n", + "1 Amy Sully 234 567 3.0\n", + "2 Art Smart 345 678 4.0" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FirstLastSATACTGPA
0AbeSmith1234562.0
1AmySully2345673.0
2ArtSmart3456784.0
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 14, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "df.melt( id_vars=[\"First\",\"Last\"], value_vars=[\"SAT\",\"ACT\",\"GPA\"] )" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 15, + "data": { + "text/plain": [ + " First Last variable value\n", + "0 Abe Smith SAT 123.0\n", + "1 Amy Sully SAT 234.0\n", + "2 Art Smart SAT 345.0\n", + "3 Abe Smith ACT 456.0\n", + "4 Amy Sully ACT 567.0\n", + "5 Art Smart ACT 678.0\n", + "6 Abe Smith GPA 2.0\n", + "7 Amy Sully GPA 3.0\n", + "8 Art Smart GPA 4.0" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FirstLastvariablevalue
0AbeSmithSAT123.0
1AmySullySAT234.0
2ArtSmartSAT345.0
3AbeSmithACT456.0
4AmySullyACT567.0
5ArtSmartACT678.0
6AbeSmithGPA2.0
7AmySullyGPA3.0
8ArtSmartGPA4.0
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 15, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Unstack/stack (level 0)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "df = pd.DataFrame( {\n", + " 'Month' : [ 'Jan','Jan','Jan','Jan','Feb','Feb','Feb','Feb' ],\n", + " 'Week' : [ 1, 2, 3, 4, 1, 2, 3, 4 ],\n", + " 'Temp' : [ 31, 29, 26, 27, 25, 19, 22, 25 ],\n", + " 'Humid' : [ 15, 20, 5, 14, 9, 14, 20, 19 ]\n", + "} )\n", + "df['Month'] = df['Month'].astype( 'category' )\n", + "df['Month'] = df['Month'].cat.set_categories( ['Jan','Feb'], ordered=True )\n", + "df = df.set_index( [ 'Month', 'Week' ] )\n", + "df" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 23, + "data": { + "text/plain": [ + " Temp Humid\n", + "Month Week \n", + "Jan 1 31 15\n", + " 2 29 20\n", + " 3 26 5\n", + " 4 27 14\n", + "Feb 1 25 9\n", + " 2 19 14\n", + " 3 22 20\n", + " 4 25 19" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TempHumid
MonthWeek
Jan13115
22920
3265
42714
Feb1259
21914
32220
42519
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 23, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "code", + "source": [ + "df.unstack( level=0 )" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 24, + "data": { + "text/plain": [ + " Temp Humid \n", + "Month Jan Feb Jan Feb\n", + "Week \n", + "1 31 25 15 9\n", + "2 29 19 20 14\n", + "3 26 22 5 20\n", + "4 27 25 14 19" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TempHumid
MonthJanFebJanFeb
Week
13125159
229192014
32622520
427251419
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 24, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Unstack/stack (level 1)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "df.unstack( level=1 )" + ], + "outputs": [ + { + "output_type": "execute_result", + "execution_count": 25, + "data": { + "text/plain": [ + " Temp Humid \n", + "Week 1 2 3 4 1 2 3 4\n", + "Month \n", + "Jan 31 29 26 27 15 20 5 14\n", + "Feb 25 19 22 25 9 14 20 19" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TempHumid
Week12341234
Month
Jan312926271520514
Feb251922259142019
\n", + "
" + ] + }, + "metadata": {} + } + ], + "execution_count": 25, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + }, + { + "cell_type": "code", + "source": [], + "outputs": [], + "execution_count": null, + "metadata": { + "collapsed": false, + "outputHidden": false, + "inputHidden": false + } + } + ], + "metadata": { + "kernel_info": { + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.7.3", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "language": "python", + "display_name": "Python 3" + }, + "nteract": { + "version": "0.15.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file