Revert "Small fixes and corrections for tutorials, mainly for Python 3 (

jmschrei#577)" (jmschrei#580) This reverts commit 73c7f11.
pepie34 · May 7, 2019 · 381c45d · 381c45d
1 parent 73c7f11
commit 381c45d
Show file tree

Hide file tree

Showing 11 changed files with 4,718 additions and 3,550 deletions.
diff --git a/tutorials/A_Overview.ipynb b/tutorials/A_Overview.ipynb
diff --git a/tutorials/B_Model_Tutorial_1_Distributions.ipynb b/tutorials/B_Model_Tutorial_1_Distributions.ipynb
diff --git a/tutorials/B_Model_Tutorial_3_Hidden_Markov_Models.ipynb b/tutorials/B_Model_Tutorial_3_Hidden_Markov_Models.ipynb
diff --git a/tutorials/B_Model_Tutorial_4_Bayesian_Networks.ipynb b/tutorials/B_Model_Tutorial_4_Bayesian_Networks.ipynb
diff --git a/tutorials/B_Model_Tutorial_4b_Bayesian_Network_Structure_Learning.ipynb b/tutorials/B_Model_Tutorial_4b_Bayesian_Network_Structure_Learning.ipynb
diff --git a/tutorials/B_Model_Tutorial_5_Bayes_Classifiers.ipynb b/tutorials/B_Model_Tutorial_5_Bayes_Classifiers.ipynb
diff --git a/tutorials/B_Model_Tutorial_6_Markov_Chain.ipynb b/tutorials/B_Model_Tutorial_6_Markov_Chain.ipynb
@@ -1,102 +1,283 @@
-## Markov Chains
-
-author: Jacob Schreiber <br>
-contact: jmschreiber91@gmail.com
-Markov Chains are a simple model based on conditional probability, where a
-sequence is modelled as the product of conditional probabilities. A n-th order
-Markov chain looks back n emissions to base its conditional probability on. For
-example, a 3rd order Markov chain models $P(X_{t} | X_{t-1}, X_{t-2}, X_{t-3})$.
-However, a full Markov model needs to model the first observations, and the
-first n-1 observations. The first observation can't really be modelled well
-using $P(X_{t} | X_{t-1}, X_{t-2}, X_{t-3})$, but can be modelled by $P(X_{t})$.
-The second observation has to be modelled by $P(X_{t} | X_{t-1} )$. This means
-that these distributions have to be passed into the Markov chain as well. 
-
-We
-can initialize a Markov chain easily enough by passing in a list of the
-distributions.
-
-```python
-%matplotlib inline
-import time
-import pandas
-import random
-import numpy
-import matplotlib.pyplot as plt
-import seaborn; seaborn.set_style('whitegrid')
-import itertools
-
-from pomegranate import *
-
-random.seed(0)
-numpy.random.seed(0)
-numpy.set_printoptions(suppress=True)
-
-%load_ext watermark
-%watermark -m -n -p numpy,scipy,pomegranate
-```
-
-```python
-from pomegranate import *
-%pylab inline
-```
-
-```python
-d1 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10})
-d2 = ConditionalProbabilityTable([['A', 'A', 0.10],
-                                ['A', 'C', 0.50],
-                                ['A', 'G', 0.30],
-                                ['A', 'T', 0.10],
-                                ['C', 'A', 0.10],
-                                ['C', 'C', 0.40],
-                                ['C', 'T', 0.40],
-                                ['C', 'G', 0.10],
-                                ['G', 'A', 0.05],
-                                ['G', 'C', 0.45],
-                                ['G', 'G', 0.45],
-                                ['G', 'T', 0.05],
-                                ['T', 'A', 0.20],
-                                ['T', 'C', 0.30],
-                                ['T', 'G', 0.30],
-                                ['T', 'T', 0.20]], [d1])
-
-clf = MarkovChain([d1, d2])
-```
-
-Markov chains have log probability, fit, summarize, and from summaries methods
-implemented. They do not have classification capabilities by themselves, but
-when combined with a Naive Bayes classifier can be used to do discrimination
-between multiple models (see the Naive Bayes tutorial notebook).
-
-Lets see the
-log probability of some data.
-
-```python
-clf.log_probability( list('CAGCATCAGT') ) 
-```
-
-```python
-clf.log_probability( list('C') )
-```
-
-```python
-clf.log_probability( list('CACATCACGACTAATGATAAT') )
-```
-
-We can fit the model to sequences which we pass in, and as expected, get better
-performance on sequences which we train on.
-
-```python
-clf.fit( map( list, ('CAGCATCAGT', 'C', 'ATATAGAGATAAGCT', 'GCGCAAGT', 'GCATTGC', 'CACATCACGACTAATGATAAT') ) )
-print( clf.log_probability( list('CAGCATCAGT') )) 
-print( clf.log_probability( list('C') ))
-print( clf.log_probability( list('CACATCACGACTAATGATAAT') ))
-```
-
-```python
-print( clf.distributions[0]) 
-```
-
-```python
-print( clf.distributions[1])
-```
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Markov Chains\n",
+    "\n",
+    "author: Jacob Schreiber <br>\n",
+    "contact: jmschreiber91@gmail.com\n",
+    "\n",
+    "Markov Chains are a simple model based on conditional probability, where a sequence is modelled as the product of conditional probabilities. A n-th order Markov chain looks back n emissions to base its conditional probability on. For example, a 3rd order Markov chain models $P(X_{t} | X_{t-1}, X_{t-2}, X_{t-3})$.\n",
+    "\n",
+    "However, a full Markov model needs to model the first observations, and the first n-1 observations. The first observation can't really be modelled well using $P(X_{t} | X_{t-1}, X_{t-2}, X_{t-3})$, but can be modelled by $P(X_{t})$. The second observation has to be modelled by $P(X_{t} | X_{t-1} )$. This means that these distributions have to be passed into the Markov chain as well. \n",
+    "\n",
+    "We can initialize a Markov chain easily enough by passing in a list of the distributions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import time\n",
+    "import pandas\n",
+    "import random\n",
+    "import numpy\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn; seaborn.set_style('whitegrid')\n",
+    "import itertools\n",
+    "\n",
+    "from pomegranate import *\n",
+    "\n",
+    "random.seed(0)\n",
+    "numpy.random.seed(0)\n",
+    "numpy.set_printoptions(suppress=True)\n",
+    "\n",
+    "%load_ext watermark\n",
+    "%watermark -m -n -p numpy,scipy,pomegranate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/jmschr/anaconda/lib/python2.7/site-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['log', 'random']\n",
+      "`%matplotlib` prevents importing * from pylab and numpy\n",
+      "  \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pomegranate import *\n",
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d1 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10})\n",
+    "d2 = ConditionalProbabilityTable([['A', 'A', 0.10],\n",
+    "                                ['A', 'C', 0.50],\n",
+    "                                ['A', 'G', 0.30],\n",
+    "                                ['A', 'T', 0.10],\n",
+    "                                ['C', 'A', 0.10],\n",
+    "                                ['C', 'C', 0.40],\n",
+    "                                ['C', 'T', 0.40],\n",
+    "                                ['C', 'G', 0.10],\n",
+    "                                ['G', 'A', 0.05],\n",
+    "                                ['G', 'C', 0.45],\n",
+    "                                ['G', 'G', 0.45],\n",
+    "                                ['G', 'T', 0.05],\n",
+    "                                ['T', 'A', 0.20],\n",
+    "                                ['T', 'C', 0.30],\n",
+    "                                ['T', 'G', 0.30],\n",
+    "                                ['T', 'T', 0.20]], [d1])\n",
+    "\n",
+    "clf = MarkovChain([d1, d2])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Markov chains have log probability, fit, summarize, and from summaries methods implemented. They do not have classification capabilities by themselves, but when combined with a Naive Bayes classifier can be used to do discrimination between multiple models (see the Naive Bayes tutorial notebook).\n",
+    "\n",
+    "Lets see the log probability of some data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-17.532789486599906"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clf.log_probability( list('CAGCATCAGT') ) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-0.916290731874155"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clf.log_probability( list('C') )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-38.55615991599665"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clf.log_probability( list('CACATCACGACTAATGATAAT') )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can fit the model to sequences which we pass in, and as expected, get better performance on sequences which we train on. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-9.49627091139\n",
+      "-0.69314718056\n",
+      "-25.2575143893\n"
+     ]
+    }
+   ],
+   "source": [
+    "clf.fit( map( list, ('CAGCATCAGT', 'C', 'ATATAGAGATAAGCT', 'GCGCAAGT', 'GCATTGC', 'CACATCACGACTAATGATAAT') ) )\n",
+    "print clf.log_probability( list('CAGCATCAGT') ) \n",
+    "print clf.log_probability( list('C') )\n",
+    "print clf.log_probability( list('CACATCACGACTAATGATAAT') )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "    \"frozen\" :false,\n",
+      "    \"class\" :\"Distribution\",\n",
+      "    \"parameters\" :[\n",
+      "        {\n",
+      "            \"A\" :0.16666666666666666,\n",
+      "            \"C\" :0.5,\n",
+      "            \"T\" :0.0,\n",
+      "            \"G\" :0.33333333333333331\n",
+      "        }\n",
+      "    ],\n",
+      "    \"name\" :\"DiscreteDistribution\"\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print clf.distributions[0] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "A\tA\t0.181818181818\n",
+      "A\tC\t0.136363636364\n",
+      "A\tG\t0.272727272727\n",
+      "A\tT\t0.409090909091\n",
+      "C\tA\t0.666666666667\n",
+      "C\tC\t0.0\n",
+      "C\tT\t0.166666666667\n",
+      "C\tG\t0.166666666667\n",
+      "G\tA\t0.333333333333\n",
+      "G\tC\t0.5\n",
+      "G\tG\t0.0\n",
+      "G\tT\t0.166666666667\n",
+      "T\tA\t0.5\n",
+      "T\tC\t0.2\n",
+      "T\tG\t0.2\n",
+      "T\tT\t0.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print clf.distributions[1]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}