diff --git a/06_Stats/Wind_Stats/Exercises.ipynb b/06_Stats/Wind_Stats/Exercises.ipynb index acd1ca732..bd4d34dcd 100644 --- a/06_Stats/Wind_Stats/Exercises.ipynb +++ b/06_Stats/Wind_Stats/Exercises.ipynb @@ -323,8 +323,9 @@ } ], "metadata": { + "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 2", + "display_name": "Python [default]", "language": "python", "name": "python2" }, @@ -338,7 +339,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.11" + "version": "2.7.12" } }, "nbformat": 4, diff --git a/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb b/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb index 4078fca74..11409c05f 100644 --- a/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb +++ b/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb @@ -87,7 +87,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/Stats/Wind_Stats/wind.data)" + "### Step 2. Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/06_Stats/Wind_Stats/wind.data)" ] }, { @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 414, + "execution_count": 3, "metadata": { "collapsed": false }, @@ -228,14 +228,15 @@ "4 10.34 12.92 11.83 " ] }, - "execution_count": 414, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# parse_dates gets 0, 1, 2 columns and parses them as the index\n", - "data = pd.read_table(\"wind.data\", sep = \"\\s+\", parse_dates = [[0,1,2]]) \n", + "data_url = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data'\n", + "data = pd.read_table(data_url, sep = \"\\s+\", parse_dates = [[0,1,2]]) \n", "data.head()" ] }, @@ -248,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 415, + "execution_count": 4, "metadata": { "collapsed": false }, @@ -377,7 +378,7 @@ "4 10.34 12.92 11.83 " ] }, - "execution_count": 415, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -406,21 +407,149 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { - "ename": "NameError", - "evalue": "name 'pd' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# transform Yr_Mo_Dy it to date type datetime64\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Yr_Mo_Dy\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Yr_Mo_Dy\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# set 'Yr_Mo_Dy' as the index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Yr_Mo_Dy'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" - ] + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RPTVALROSKILSHABIRDUBCLAMULCLOBELMAL
Yr_Mo_Dy
1961-01-0115.0414.9613.179.29NaN9.8713.6710.2510.8312.5818.5015.04
1961-01-0214.71NaN10.836.5012.627.6711.5010.049.799.6717.5413.83
1961-01-0318.5016.8812.3310.1311.176.1711.25NaN8.507.6712.7512.71
1961-01-0410.586.6311.754.584.542.888.631.795.835.885.4610.88
1961-01-0513.3313.2511.426.1710.718.2111.926.5410.9210.3412.9211.83
\n", + "
" + ], + "text/plain": [ + " RPT VAL ROS KIL SHA BIR DUB CLA MUL \\\n", + "Yr_Mo_Dy \n", + "1961-01-01 15.04 14.96 13.17 9.29 NaN 9.87 13.67 10.25 10.83 \n", + "1961-01-02 14.71 NaN 10.83 6.50 12.62 7.67 11.50 10.04 9.79 \n", + "1961-01-03 18.50 16.88 12.33 10.13 11.17 6.17 11.25 NaN 8.50 \n", + "1961-01-04 10.58 6.63 11.75 4.58 4.54 2.88 8.63 1.79 5.83 \n", + "1961-01-05 13.33 13.25 11.42 6.17 10.71 8.21 11.92 6.54 10.92 \n", + "\n", + " CLO BEL MAL \n", + "Yr_Mo_Dy \n", + "1961-01-01 12.58 18.50 15.04 \n", + "1961-01-02 9.67 17.54 13.83 \n", + "1961-01-03 7.67 12.75 12.71 \n", + "1961-01-04 5.88 5.46 10.88 \n", + "1961-01-05 10.34 12.92 11.83 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -444,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 423, + "execution_count": 6, "metadata": { "collapsed": false }, @@ -467,7 +596,7 @@ "dtype: int64" ] }, - "execution_count": 423, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -486,7 +615,7 @@ }, { "cell_type": "code", - "execution_count": 424, + "execution_count": 7, "metadata": { "collapsed": false, "scrolled": true @@ -510,7 +639,7 @@ "dtype: int64" ] }, - "execution_count": 424, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -530,7 +659,7 @@ }, { "cell_type": "code", - "execution_count": 426, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -541,7 +670,7 @@ "10.227982360836924" ] }, - "execution_count": 426, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -562,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": 264, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -686,7 +815,7 @@ "MAL 0.67 42.54 15.599079 6.699794" ] }, - "execution_count": 264, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -713,7 +842,7 @@ }, { "cell_type": "code", - "execution_count": 404, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -731,57 +860,65 @@ " mean\n", " std\n", " \n", + " \n", + " Yr_Mo_Dy\n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " 0\n", - " 1.0\n", + " 1961-01-01\n", + " 9.29\n", " 18.50\n", - " 12.016667\n", - " 4.382798\n", + " 13.018182\n", + " 2.808875\n", " \n", " \n", - " 1\n", - " 1.0\n", + " 1961-01-02\n", + " 6.50\n", " 17.54\n", - " 10.475000\n", - " 4.260110\n", + " 11.336364\n", + " 3.188994\n", " \n", " \n", - " 2\n", - " 1.0\n", + " 1961-01-03\n", + " 6.17\n", " 18.50\n", - " 10.755000\n", - " 4.664914\n", + " 11.641818\n", + " 3.681912\n", " \n", " \n", - " 3\n", - " 1.0\n", + " 1961-01-04\n", + " 1.79\n", " 11.75\n", - " 6.186923\n", - " 3.435771\n", + " 6.619167\n", + " 3.198126\n", " \n", " \n", - " 4\n", - " 1.0\n", + " 1961-01-05\n", + " 6.17\n", " 13.33\n", - " 9.889231\n", - " 3.551768\n", + " 10.630000\n", + " 2.445356\n", " \n", " \n", "\n", "" ], "text/plain": [ - " min max mean std\n", - "0 1.0 18.50 12.016667 4.382798\n", - "1 1.0 17.54 10.475000 4.260110\n", - "2 1.0 18.50 10.755000 4.664914\n", - "3 1.0 11.75 6.186923 3.435771\n", - "4 1.0 13.33 9.889231 3.551768" + " min max mean std\n", + "Yr_Mo_Dy \n", + "1961-01-01 9.29 18.50 13.018182 2.808875\n", + "1961-01-02 6.50 17.54 11.336364 3.188994\n", + "1961-01-03 6.17 18.50 11.641818 3.681912\n", + "1961-01-04 1.79 11.75 6.619167 3.198126\n", + "1961-01-05 6.17 13.33 10.630000 2.445356" ] }, - "execution_count": 404, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -809,7 +946,7 @@ }, { "cell_type": "code", - "execution_count": 427, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -832,7 +969,7 @@ "dtype: float64" ] }, - "execution_count": 427, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -848,7 +985,7 @@ "data['year'] = data['date'].apply(lambda date: date.year)\n", "data['day'] = data['date'].apply(lambda date: date.day)\n", "\n", - "# gets all value from the month 1 and assign to janyary_winds\n", + "# gets all value from the month 1 and assign to january_winds\n", "january_winds = data.query('month == 1')\n", "\n", "# gets the mean from january_winds, using .loc to not print the mean of month, year and day\n", @@ -864,7 +1001,7 @@ }, { "cell_type": "code", - "execution_count": 428, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -1305,7 +1442,7 @@ "1978-01-01 10.00 15.09 20.46 1978-01-01 1 1978 1 " ] }, - "execution_count": 428, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1323,7 +1460,7 @@ }, { "cell_type": "code", - "execution_count": 429, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -2670,7 +2807,7 @@ "[216 rows x 16 columns]" ] }, - "execution_count": 429, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -2688,7 +2825,7 @@ }, { "cell_type": "code", - "execution_count": 430, + "execution_count": 14, "metadata": { "collapsed": false }, @@ -2856,7 +2993,7 @@ "1961-01-29 19.95 27.71 23.38 1961-01-29 1 1961 29 " ] }, - "execution_count": 430, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -2876,7 +3013,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -4202,14 +4339,15 @@ "[6574 rows x 16 columns]" ] }, - "execution_count": 3, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# call data again but this time don't use parse_dates\n", - "wind_data = pd.read_table(\"wind.data\", sep = \"\\s+\") \n", + "data_url = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data'\n", + "wind_data = pd.read_table(data_url, sep = \"\\s+\") \n", "\n", "# compute the month number for each day in the dataset, there are in total 216 months\n", "wind_data['months_num'] = (wind_data.iloc[:, 0] - 61) * 12 + wind_data.iloc[:, 1]\n", @@ -4231,7 +4369,7 @@ }, { "cell_type": "code", - "execution_count": 433, + "execution_count": 16, "metadata": { "collapsed": false }, @@ -4593,7 +4731,7 @@ "[10 rows x 48 columns]" ] }, - "execution_count": 433, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -4603,13 +4741,14 @@ "weekly = data.resample('W').agg(['min','max','mean','std'])\n", "\n", "# slice it for the first 52 weeks and locations\n", - "weekly.ix[1:53, \"RPT\":\"MAL\"].head(10)" + "weekly.loc[weekly.index[1:53], \"RPT\":\"MAL\"] .head(10)" ] } ], "metadata": { + "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 2", + "display_name": "Python [default]", "language": "python", "name": "python2" }, @@ -4623,7 +4762,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.11" + "version": "2.7.12" } }, "nbformat": 4, diff --git a/06_Stats/Wind_Stats/Solutions.ipynb b/06_Stats/Wind_Stats/Solutions.ipynb index a0160b398..53151490b 100644 --- a/06_Stats/Wind_Stats/Solutions.ipynb +++ b/06_Stats/Wind_Stats/Solutions.ipynb @@ -87,7 +87,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 2. Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/Stats/Wind_Stats/wind.data)" + "### Step 2. Import the dataset from this [address](https://github.com/guipsamora/pandas_exercises/blob/master/06_Stats/Wind_Stats/wind.data)" ] }, { @@ -3483,8 +3483,9 @@ } ], "metadata": { + "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 2", + "display_name": "Python [default]", "language": "python", "name": "python2" }, @@ -3498,7 +3499,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.11" + "version": "2.7.12" } }, "nbformat": 4,