Commit 8da8ded9 by Sanjay Krishnan

Updated with more data

parent 688691cf
......@@ -435,7 +435,7 @@
{
"cell_type": "code",
"execution_count": 91,
"id": "0248b4a6",
"id": "4fbc338f",
"metadata": {},
"outputs": [
{
......@@ -468,7 +468,7 @@
{
"cell_type": "code",
"execution_count": 86,
"id": "a53037d7",
"id": "1a4c1bca",
"metadata": {},
"outputs": [
{
......@@ -493,7 +493,7 @@
{
"cell_type": "code",
"execution_count": 87,
"id": "e285087a",
"id": "48d84085",
"metadata": {},
"outputs": [
{
......@@ -518,7 +518,7 @@
{
"cell_type": "code",
"execution_count": 89,
"id": "19a949e5",
"id": "5b2387e8",
"metadata": {},
"outputs": [
{
......@@ -546,15 +546,66 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "284684ec",
"execution_count": 94,
"id": "66779452",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"goo\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAALMAAALpCAYAAAD4nhdLAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3deViU5f4G8HsW1kFwy13BJKAUM00zC4XMk0vWqUzSMjXrLHZKj+fXKTM91VUebbMul1xSExAC3LfcUtlEilxQE1ARFUFBBWFGg1m+vz88kuSSyvvO884z38918YfD+Dw3j7fDOzPvPK+OiAiMub5kvegEjCmFy8ykwWVm0jCKDqC0oqIi7Ny5U3QMzRs6dKjoCIrTyfYEMCkpCdHR0aJjaJ5k/+yAzE8AiYi/rvOVmJgo+p9GNdKWmbkfLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFldoJTp06JjuAWjKIDqCUpKUl0BADApUuXMHfuXPzzn/8UHQUAkJmZKTqCaqQtc3R0tOgIdWgtj4ykO8wYOnQoiEgzX3369AEAzJ8/X3iWq79kJF2ZteT06dNIS0uDTqdDTEyM6DjS4zKrKDExEXq9HkSEjIwMFBUViY4kNS6zimJiYuBwOAAARqNRM09KZcVlVklBQQH27NlTW2abzYYlS5YITiU3LrNK4uPjYTT+9mIRESEnJwf5+fkCU8mNy6ySmJgYWK3WOrd5enoiISFBUCL5cZlVsG/fPhw+fPia22tqarB48WIBidwDl1kFCQkJ8PDwuO73jh8/jt27dzs5kXvgMiuMiBAbG3vNIcYVfKihHi6zwjIyMlBcXHzD79fU1GDJkiW1r3Iw5XCZFXazQ4wrysrKkJaW5qRE7oPLrCCbzYbExETY7XZ4eXnBy8sLnp6e8PDwqP2zl5cXAPChhgqkPWtOhPPnz+Ott96qc1tOTg7i4+Mxbdq0Orc3atTImdHcgo5kPYVKI5KSkhAdHS3tmWoaksyHGUwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2nwZuMqqKmpwZEjR3DmzBns378fJpMJW7Zsga+vL9q2bYu2bdtCp9OJjikd3mxcAefOncOGDRuwfft2pKeno6CgAHa7/Yb3N5lM6NSpEyIjI9G3b19ERUXVuZoruyPJXOY7RETYuHEj5s+fjw0bNkCn06Fnz56IjIxEx44dERoaipYtW8LX1xcmkwkVFRWwWCwoLCxEXl4efv75Z2zbtg25ublo3rw5hg8fjtdffx0dOnQQ/aO5qmQQu22rVq2iLl26kE6no759+1JMTAyZzeY7GqugoIA++OADuvvuu8lgMNDw4cMpPz9f4cRuIYnLfBuOHj1KAwYMIJ1OR88//zzt2bNHsbFtNhvFx8fTfffdR97e3vSf//yHLl26pNj4boDLfKuWLVtGDRs2pNDQUNq6datq81itVvryyy/J39+fOnbsSAcOHFBtLslwmf+I3W6nf/zjH6TT6Wj8+PFUXV3tlHmPHj1K3bt3pwYNGtD69eudMqeL4zLfTHV1NQ0dOpS8vb0pOTlZyPyjR48mDw8PiomJcfr8LiaJXw+6AYfDgREjRmDjxo34/vvvERkZ6fQMnp6eWLhwIZo1a4ZRo0bB09MT0dHRTs/hMkT/d9Kqf/zjH+Tt7U3bt28XHYWIiMaPH0+enp60bds20VG0ig8zrmfp0qWk0+mEHFrciN1up+eff56aN29OxcXFouNoURK/afI7BQUF6NKlC8aMGYMZM2aIjlNHVVUVunXrhsDAQGzevJnfEq+L3wH8vQEDBqCoqAg///wzPD09Rce5RnZ2Nnr27InFixdjxIgRouNoCb8DeLWVK1eSTqej1NRU0VFuauzYsdS8eXOqrKwUHUVL+DDjCiJCt27dEBwcjKSkJNFxbur8+fNo37493n33Xbz99tui42hFMp/P/D8bN27E3r178e6774qO8ocaN26MsWPHYsaMGfj1119Fx9EMLvP/LFiwAI899hi6dOkiOsotGTduHM6dO4fVq1eLjqIZXGb8dj7yyJEjFRnPbDZj9erV+OCDDxQZ73patGiBfv36ITY2VrU5XA2XGcCGDRsAAM8884wi4y1btgyvvvoqEhISFBnvRoYNG4YtW7bAYrGoOo+r4DID2LZtG3r27Ak/Pz9Fxhs1ahQefPBBRca6mccffxw1NTXIyMhQfS5XwGUGkJGRgT59+ig6psFgUP1NjZYtWyI0NBTp6emqzuMq3P5Eo5qaGhQUFCA8PFy1OXbu3IlNmzahc+fOeO655xQdu2PHjjh06JCiY7oqty/zkSNHYLfbERISovjY1dXVGDx4MIgIBQUF+PDDD/HSSy8p+qQtNDQU69atU2w8V+b2ZS4tLQVw+dUBpZ06dQrff/89QkNDQUR45plnEBcXh+HDh2PAgAGKzNGiRQuUlZUpMparc/tjZrPZDACKPfm72pVPaQOATqfD3//+dwDA+vXrFZujQYMGqKqqUmw8V+b2Za6pqQEAeHl5qT5Xz549odfrUVxcrNiY3t7eqK6uVmw8V+b2Zfb19QUAp7xW6+/vDz8/P9x9992KjWk2m2EymRQbz5W5fZkbNGgAAE75Vb1nzx5UVlYqdrwMAJWVlfD391dsPFfm9mVu164dAOD48eOKj202m+FwOGr/nJycjOjoaPTt21exOQoLC2t/Bnfn9mVu06YN/Pz8kJeXp+i448aNg5+fH5544gl88MEH+Nvf/gaj0Yi4uDhF58nLy6t9kunu3P6lOZ1Oh44dOyI7OxujR49WbNx+/fqhX79+uHTpEs6ePYu2bdsqNvYVDocDe/bswcCBAxUf2xW5/SMzAERGRmL79u2qjO3j46NKkQFg3759OHv2LKKiolQZ39VwmQH07dsXhw4dQkFBgegot+X7779Hs2bNVH0r3pVwmQFERUWhRYsWih/Pqi0hIQHPP/889Hr+ZwS4zAAAo9GIYcOGYcmSJTfdJFxLMjMzceDAAf6E9lX4A63/U1BQgNDQUMTExGDYsGGi4/yhp556CqWlpdi1a5foKFrB+2Zc7cUXX8S+ffuwd+9eTV+WITs7Gz169MCaNWvw5JNPio6jFVzmqx05cgTh4eH4+OOPMWHCBNFxrsvhcKBXr17w9PRESkoK72r0G95q4GrBwcF4++238f7772v2lY3Zs2fj559/xuzZs7nIv8OPzL/z66+/4uGHH4bRaERGRoamtujavXs3evXqhYkTJ+I///mP6Dhaw4cZ15Ofn48HH3wQQ4YMwcKFCzXxCFhSUoJHHnkE7du3x5YtW/jluGvxYcb1hISE4LvvvkNcXBwmTpwoOg4qKiowYMAAeHp6IjExkYt8A9p9yi7YwIEDsXDhQowaNQrV1dX44osvhDxCl5SUYMCAATh37hzS09PRtGlTp2dwFVzmmxgxYgS8vLzw8ssv49SpU1i4cGHt+c/OsHv3bgwZMgSenp5IS0tDYGCg0+Z2Rfz76g8MHToU33//PVJTU9GtWzf89NNPqs/pcDgwc+ZM9OrVC+3bt0d6ejqCgoJUn9flCdhH1yUVFxdT3759yWAw0NixY+ncuXOqzPPTTz9Rjx49yGg00vvvv082m02VeSTE1zS5HQ6Hg2JiYqh58+bk7+9P77zzDpWUlCgydmZmJg0ePJh0Oh1FRERQTk6OIuO6ES7znaisrKRp06ZRs2bNyGg00sCBA2nJkiU3vHBOUVHRNbfZ7Xbas2cPffzxx3TfffcRAOrZsyetXbuWHA6H2j+CjHjn/Pr49ddfsXr1asTGxmLLli2oqalBaGgoOnXqhJCQELRs2RIGgwGxsbEYPXo0qqqqcOzYMeTn52PPnj04e/Ys7rrrLgwdOhQjRozAQw89JPpHcmX8polSLBYLMjIykJaWhtzcXOTn5+PMmTOoqKhAdXU1GjZsiAYNGqBt27YICwtDeHg4oqKiEB4ezq8bK4PLrLbIyEikpKRg/vz5eO2110THkRm/A6im06dPIy0tDTqdDjExMaLjSI/LrKIrbz0TETIyMlBUVCQ6ktS4zCqKiYmp3QTGaDRq/pJsro7LrJKCggLs2bOntsw2mw1LliwRnEpuXGaVxMfH1/noFREhJycH+fn5AlPJjcuskpiYGFit1jq3eXp6qn4FKnfGZVbBvn37cPjw4Wtur6mpweLFiwUkcg9cZhUkJCTAw8Pjut87fvw4du/e7eRE7oHLrDAiQmxs7DWHGFfwoYZ6uMwKy8jIuOllHmpqarBkyZI6+zYzZXCZFXazQ4wrysrKkJaW5qRE7oPLrCCbzYbExETY7XZ4eXnBy8sLnp6e8PDwqP3zlQsB8aGG8vgzgAo6f/483nrrrTq35eTkID4+HtOmTatze6NGjZwZzS3wWXMqS0pKQnR0NHiZVcdnzTF5cJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKQh3WbjRUVF2Llzp+gYtX766SeYTCbNXWp46NChoiMoTrrNxq9s7s1uTrJ/dkDmzcaJiL+u85WYmCj6n0Y10paZuR8uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWV2gqKiItER3IJRdAC1JCUliY4AALBYLJg7dy7+9a9/iY4CAMjMzBQdQTXSljk6Olp0hDq0lkdG0h1mDB06FESkma/evXsDAObOnSs8y9VfMpKuzFpSUlKC9PR0AEBMTIzgNPLjMqvou+++g15/eYkzMzNx/PhxwYnkxmVWUUxMDOx2OwDAaDQiOTlZcCK5cZlVcvToUezbt6/2+NRms/Ghhsq4zCqJj4+H0fjbi0VEhP379+OXX34RmEpuXGaVxMbGwmq11rnNw8NDM69/y4jLrII9e/bg8OHD19xutVqxePFiAYncA5dZBQkJCfD09Lzu906cOIHs7GwnJ3IPXGaFERGWLl2Kmpqa637fw8MDCQkJTk7lHrjMCktPT0dxcfENv2+1WhEbG1v7kh1TDpdZYTc7xLiirKwMaWlpTkrkPrjMCrLZbEhMTITdboeXl1ftl6enZ50/A+BDDRVIe9acCOfOncO///3vOrfl5OQgPj4e06ZNq3N7o0aNnBnNLehI1lOoNCIpKQnR0dHSnqmmIcl8mMGkwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGbzaukoqKClRVVeHUqVMAgIKCAphMJjRo0AC+vr6C08mJNxuvJ4fDgZycHOzYsQPZ2dnIy8tDfn4+Kisrb/h3WrdujdDQUISFhaFXr16IiopCq1atnJhaSslc5jvgcDiwbds2xMbGYv369Th37hyaNm2KHj164N5770VISAgCAwPh7+8Pk8kEk8mEiooKmM1mVFZW1hb+4MGDyM7ORk1NDcLCwjB06FCMGDECwcHBon9EV5QMYresoqKCPv74Y2rTpg0BoIceeog+//xz2rt3L9nt9jsa02Kx0KZNm2jChAnUqlUrAkCPPPIIrVy5khwOh8I/gdSSuMy3oLKykt577z0KCAiggIAAevvttyk3N1fxeWw2G23cuJGeeeYZ0uv11KlTJ0pKSlJ8Hklxmf9IYmIitW7dmpo0aUIff/wxVVRUOGXeAwcO0PDhw0mv11Pfvn1V+c8jGS7zjZSVldGgQYNIp9PRmDFjqKysTEiOzMxMeuCBB8jT05M++eQTPvS4MS7z9aSmplKbNm0oMDCQ0tLSRMchm81G06dPJw8PDxo0aBCdPXtWdCQt4jL/3rfffktGo5GefvppOn/+vOg4dezcuZPatWtHd999N+Xn54uOozVc5qt9+umnpNPpaNKkSZr9dV5WVkY9evSgZs2aUXZ2tug4WsJlvmLq1Kmk1+vpq6++Eh3lD1VVVdETTzxB/v7+tHv3btFxtILLTES0YMEC0ul0NHv2bNFRbll1dTU9/vjj1Lx5czpy5IjoOFrAZd64cSMZDAaaMmWK6Ci3rbKykh588EEKDg6m8vJy0XFES3Lrt7OLi4vRpUsX9OvXD0uXLhUd546cOXMGXbt2xcMPP4xly5aJjiOS+14I3uFw4MUXX0Tjxo0xb9480XHuWPPmzbF06VKsWrUKs2fPFh1HLNG/G0SZM2cOeXh40N69e0VHUcSUKVPIZDLRiRMnREcRxT0PM0pLSxEWFoa//OUvmDZtmug4iqipqUHnzp3RqVMndz3ccM/DjEmTJsHPzw+TJ08WHUUxnp6emDlzJpYvX44ffvhBdBwh3O6R+cSJEwgODsa8efMwevRo0XEU179/f/z666/YsWOH6CjO5n6PzJ9++ilatGiBF198UXQUVUyZMgUpKSlIT08XHcXp3OqRubKyEi1btsT06dPxj3/8Q3Qc1URERKBZs2ZYvny56CjO5F6PzElJSXA4HHjppZdER1HVq6++inXr1uHcuXOioziVW5U5NjYWTz/9NBo2bCg6iqqGDBkCT09PJCcni47iVG5T5rKyMqSlpWHYsGGio6jOZDJh0KBBWLFihegoTuU2Zd6xYwcMBgOioqKcPrfZbMbatWvx9ttv1+s+t6Nfv37IyMhAdXW1IuO5Arcp8/bt29GtWzf4+/s7fe6NGzfizTffxHfffVev+9yOxx9/HBcvXkRWVpYi47kCtylzVlYWHn30USFzDxkyBD169IDReOMNpG7lPrcjMDAQbdq0QWZmpiLjuQK3KDMRIT8/H/fdd5+wDHq9Hnr9zZf7Vu5zO+69917k5eUpNp7WucVec0VFRTCbzQgNDVVszPz8fOzatQs5OTl45JFH8Mwzz9T5/vnz57Fs2TIUFhbiwQcfBBFBp9Pd9n3qIzQ0FLt371ZsPM0Tdo6TE6WmphIAKi4uVmS8GTNmUGRkJDkcDjp27BgFBQXRnDlzar+fm5tL3bt3p507d5LVaqV58+aRl5cXhYSE3NZ9lMjZsmVLxcbTuCS3OMy4cOECACAgIECR8WbPno2OHTtCp9MhKCgIXbp0wbp162q/P3LkSERGRuLhhx+G0WjEa6+9htatW9cZ41buU18BAQE33cBRNm5RZrPZDIPBAB8fH0XG27FjBz766CMAwC+//IKTJ0/i8OHDAIBt27YhKyurzkuAOp0O3bt3rz2EuJX7KKFBgwa4ePEi7Ha7YmNqmVuU+dKlS/Dx8VGsKK1bt8aPP/6IN998E4cOHUKHDh3gcDgAAPv27QMAdOrUqc7fuXruW7mPEkwmE4gIFy9eVHRcrXKLJ4De3t64dOmSYuNNnjwZKSkp2LRpE3x8fOqc0HPl13pWVhbatm1b5+9dKeut3EcJV0qs1G8krXOLR+YGDRrAbrcrUuhjx47ho48+wksvvVRbkiuPygAQHh4O4PKhxI3cyn2UUFVVBV9fX8Veu9Y6tyjzlXf9rjwRrA+z2QwASEhIQGVlJdLS0pCamory8nKYzWZERUUhLCwMsbGxSE1NBXD5U+ApKSkoKipCTk4OBg4c+If3sdls9c5aWVkp5B1PUdyizIGBgQAuP6rWV3h4OF555RWkp6ejW7du+OWXXzBz5kyYzWY8/fTTICJ8//33uPfee9GnTx906NABb731Fh588EF06dIFO3fuBIA/vI8SZT569CiCgoLqPY6rcIuT8x0OBxo0aIDZs2dj1KhRioxZVVWFBg0a1P65uroaXl5ede5TVlYGX19fmEwmmM1m+Pn5XTPOrdznTvXv3x8tWrTAt99+q9iYGuYeJ+fr9Xrcc889OHTokGJjXl1kANcUGQDuuusumEwmALhhSW/lPncqNzdX0Xc9tc4tygwAPXr0QEZGhugYTnPy5EkcP34cDz30kOgoTuM2ZY6KikJWVhaqqqpER3GKH374Ad7e3nj44YdFR3Eatyqz3W6vffVAdj/88AN69erlNq8xA25U5hYtWqBnz56KnfyuZRcvXsTq1avx5z//WXQUp3KbMgPAiBEjsGLFCukPNVatWoVLly5h6NChoqM4lVuVOTo6Gna7HQkJCaKjqGrhwoUYMGAAmjdvLjqKU7lVmRs3boyXX34Z06dPV+RNCS3KysrCtm3bMG7cONFRnM4t3jS5WkFBAUJDQ7FkyRIMHz5cdBzFDR48GGVlZdi1a5foKM7mnheCHzlyJDIyMrB//36pnu2npqYiMjIS69atw8CBA0XHcTb3LPPp06cRFhaG8ePH4/333xcdRxE2mw3dunVDy5YtsXHjRtFxRHCPt7N/r0WLFnj//fcxffp0Rd/iFumzzz7D4cOHMWfOHNFRhHHLR2bg8iNZREQELBYLsrKyXPpwY9euXejduzemTp2K//u//xMdRxT3PMy44vjx43jggQfw3HPPYcGCBaLj3JHz58+ja9eu6NixI9atW6f4R69ciHseZlwRGBiIb7/9FosWLcKnn34qOs5tu3TpUu051EuWLHHnIl8mYH8DzZk9ezbpdDpauHCh6Ci3zGaz0bPPPktNmjShX375RXQcLUhyjw+H/YGxY8eiqKgIf/3rXwEAr7zyiuBEN1ddXY0XX3wRGzduxNatW3HvvfeKjqQNov87acm0adNIp9PRf/7zH9FRbqiqqor69etHDRs2pJSUFNFxtISvnf17s2bNIr1eT8899xxVVFSIjlPHwYMHqWPHjtSyZUvas2eP6Dha4x7bc92O119/HevXr0dqaioeeugh7NmzR3QkEBHmzp2Lbt264a677kJ2dja6dOkiOpb2iP7vpFWnTp2iyMhIMhqNNH78eLpw4YKQHPv27aNHH32UDAYDTZkyhWw2m5AcLoAPM27G4XDQwoULqWnTptSyZUv66quv6OjRo1RZWanqvPn5+XTkyBH6y1/+QkajkXr27MmHFX+My3wrzp07R+PGjSMfHx8yGo304Ycf0smTJ1WZa+fOnRQREUF6vZ7uvvtuWrRoEdntdlXmkgyX+VaVlJRQUFAQ+fj4UOPGjUmv11Pfvn1p/vz5dOTIkTse12q10s6dO2nKlCkUHBxMAOjuu+8mADRmzBhyOBwK/hRSS3Lrt7NvVWlpKSIiIpCfn4/w8HBkZ2djw4YNiImJwebNm2GxWBAYGIhevXohNDQUYWFhCAoKgp+fH/z8/GAymVBZWYkLFy6gsrIS+fn5yMvLw8GDB5GRkYGqqiq0adMGzz//PEaMGIGAgAB06NABwOXXvBcsWKDo5SEk5d7nZtyKM2fOICIiAoWFhbBarRg8eDDWrFlT+/2ampraT3fs3r0bubm5KCgouOknWRo0aFBb+kceeQRRUVF1Nmuprq6Gj48PiAh6vR5/+ctfMGfOHH67+uaS+R3AmygrK0OfPn1qi+zp6Vm7b90Vnp6eiIiIQERERO1tVqsVJSUlqKqqgtlshsViQUBAAPz9/dGgQQO0aNHipvN6eXmhYcOGKC8vh8PhwPz582G1WrFgwQIu9E1wmW+grKwMvXv3RkFBAaxWK4DLeyffyqUaPDw80K5du3rN36pVK5SXlwO4vFfe4sWLodfrMW/ePC70DfCB2HVcKfKRI0dqiwxcfsRt06aNUzK0b9++zp8dDgcWLlyI8ePHg48Mr4/L/Dvl5eXo27cvjh49es1xr8PhcFqZ27VrBw8Pj2vmnzVrFv75z386JYOr4TJfpby8HJGRkcjNza3ziHw1Z5W5devW130Fw+FwYObMmRg/frxTcrgSLvP/VFRUICoqCocOHbphkW/1mFkJbdq0uWGOK4XmR+i6uMy4/NGjiIgI/PLLLzcsEHD5JTVnfVawTZs2da6V8nsOhwNfffUV3nrrLafkcQVcZgCxsbHIzc39w/u1atXKCWku+6PDGZ1OB51Oh7i4OOzfv99JqbSNywxg3LhxKCoqwoQJE+Dl5XXDqzP9/hUGNd2ozHq9HjqdDq1atcIXX3yBgoKC2qtXuTsu8/80b94c06ZNQ1FREV566aVrLjnm4eFxzRsmavL19a1zqQmDwQCdToemTZvi66+/RmFhIcaNG+fSWyQojcv8O02bNsX+/fvxpz/9CR9++CEaN24Mg8EAu93utCd/V7Rs2bL2DZLw8HDExcWhpqYG58+fd5tr+90Wkac5adH69esJAGVnZxMRkcVioa+++opatWpFixYtcmqWJ554giIiImjz5s21t73zzjvUrFkzslgsTs3iAvisud+LiIiAv78/1q9fX+f2mpoaWCwWNGrUyGlZzpw5c80ey6WlpWjfvj2mTZuGN954w2lZXACfNXe11NRU9OnTB2lpaXj00UdFx7mhN998EytXrsTRo0fh6ekpOo5WcJmv1r9/f1y6dAkpKSmio9zUyZMnERwcjK+//lrze3w4EZf5ir1796Jr167YsGED+vfvLzrOH3rllVeQmpqKvLw8GAwG0XG0gMt8xZAhQ3D06FHs3r3bJU6xPHr0KEJDQxEfH+92F+K5AS4zcDzG6TAAAB33SURBVPmyvB07dkRSUhKee+450XFuWXR0NPLy8rBnzx6X+A+oMi4zcPmyED/++CMOHjzoUp+127dvHx544AGsXbsWgwYNEh1HNC7ziRMnEBwcjAULFmDkyJGi49y2QYMG4fz588jMzBQdRTT33p8ZAKZPn46WLVti2LBhoqPckcmTJ2PXrl2afwXGGdz6kfnMmTNo3749PvvsM4wdO1Z0nDsWGRkJLy8vbNq0SXQUkdz7kfnzzz+Hv78/Ro8eLTpKvUycOBGbN2/GTz/9JDqKUG77yHz+/HkEBQXhvffew7///W/RceqtR48eaNu2LZYvXy46iiju+8g8c+ZM6PX62t3yXd0777yDlStX4sCBA6KjCOOWZbZYLJg1axbGjRuHgIAA0XEU8cwzz6Bjx4745JNPREcRxi3LPHfuXFy6dEmqs850Oh3eeustxMfH48iRI6LjCOF2Za6ursaMGTPwt7/9DU2bNhUdR1EvvvgigoKC8MUXX4iOIoTblXnx4sU4e/aslB/TNxgM+Ne//oVFixahuLhYdBync6sy2+12fP755xg1apTTPwLlLK+88gqaNGmCGTNmiI7idG5V5oSEBBw7dgz/+te/REdRjZeXF8aPH4+vv/4aZ8+eFR3HqdymzESE6dOnY9iwYbjnnntEx1HV2LFj4ePjg1mzZomO4lRuU+ZVq1bh4MGDePvtt0VHUZ3JZMLrr7+OmTNnoqqqSnQcp3GbMn/66ad4+umn0alTJ9FRnOLNN9+E1WrFvHnzREdxGrco85YtW5CZmSnF29a3qnHjxvjrX/+Kzz77DJcuXRIdxync4tyMxx57DAaDAVu2bBEdxalOnz6Nu+++G59//jn+/ve/i46jNvnPzcjKysL27dvx7rvvio7idC1atMDIkSMxbdq0m+5uKgvpH5kHDx6MsrIy7Nq1S3QUIY4dO4aQkBAsWrQII0aMEB1HTXJ/bConJwddunTB6tWrMXjwYNFxhHn55Zfx008/udxnHG+T3GUeNmwYfvnlF+zdu9etP7186NAhdOrUCcnJyXj22WdFx1GLvGU+evQowsLCEBsbixdeeEF0HOGeffZZFBYW4ueff5b1P7a8TwD/+9//IjAwEEOGDBEdRRMmT56MvXv3Sv2KjpSPzEVFRejQoQNmz56NV199VXQczXjiiSdQXV2NHTt2iI6iBjkfmT/77DM0a9YML7/8sugomjJx4kSkpKQgPT1ddBRVSPfIfO7cOQQFBeGjjz7CuHHjRMfRnIiICAQEBGDdunWioyhNvkfmGTNmwMvLC2PGjBEdRZPeeecdrF+/Hrt37xYdRXFSPTJXVlYiMDAQ//d//4dJkyaJjqNZ3bp1Q3BwMBITE0VHUdK1L82tWrXKJfdcAy5f6LG6uhre3t6qv/x04cIFVcZ1xvrbbDbYbDZ4e3urOo+arrP+yddcsqimpgaVlZWYP3++c1K5mOzsbFXXhtf/5m62/je8/tZrr72mWiBXFhAQ4JSi8fpf383WX7ongMx9cZmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGi5RZofDITqCW3OV9dd8mR0OB7788kvRMdyWK62/8UbfCAgIcGaOG7Lb7bh48SI++OAD0VEAAFar1Snz8Ppf383W/5oyd+/eHfPnz1c10O2Ii4tDamoqXn31VYSFhYmOozpe/3ogDaupqSF/f38CQGPGjBEdx+242PonafqYedOmTaisrAQAJCYmorq6WnAi9+Jq66/pMsfHx8PDwwMAYLFYsHHjRsGJ3Iurrb9my3zx4kWsXLmy9oDfYDBg6dKlglO5D1dcf82WefXq1XV+rdlsNqxZswZms1lgKvfhiuuv2TLHxcVBr68bz2q1YtWqVYISuRdXXH9Nlrm8vBxbtmyB3W6vc7tOp0NcXJygVO7DVddfk2VetmzZNQsJXH4Bf+vWrSgtLRWQyn246vprssyxsbE3/J5Op8Py5cudmMb9uOr6a67MJSUlyMjIuOHJLQ6HAzExMU5O5T5cef01V+aEhIRrnnhczeFwICsrC4WFhc4L5UZcef01V+bY2FjYbLab3oeIkJyc7KRE7sWV1/+GZ82JUFxcjIqKCrRu3br2turqaly4cAHNmjWrc9/MzExnx5Oeq6+/johIdIibSUpKQnR0NDQeU1outP7JmjvMYOxOcZmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS0MzO+TabDT/++CPS09ORl5eHvLw8nD59GlVVVTCZTAgKCoKvry/atWuH0NBQdO7cGZGRkejQoYPo6FKQYf2F7pxvs9mwefNmxMTEYP369TCbzWjVqhU6deqEkJAQtGrVCr6+vvDx8UFlZSUsFgsKCwuRm5uL/fv3w2KxICgoCC+88AJGjBiB++67T9SP4pIkW/9kkACXLl2i2bNnU2BgIOl0OoqIiKA5c+ZQXl7eLY9RXV1NaWlpNHHiRGrXrh0BoH79+tH27dvVCy4JSdc/yellXrFiBbVt25Z8fHzo9ddfpyNHjtR7TLvdTps2baKoqCgCQP3796fDhw8rkFY+Eq+/88p89uxZevrpp0mn09HIkSOppKRElXlSUlIoPDycvL296YsvviCHw6HKPK7GDdbfOWXOzMyktm3bUrt27Zzya8hqtdLUqVPJaDTS4MGDqaKiQvU5tcxN1l/9Mq9du5Z8fX1p0KBBdO7cObWnqyMjI4Nat25N999/PxUXFzt1bq1wo/VXt8zLly8no9FIY8aMIZvNpuZUN1RYWEihoaHUoUMHtyu0m62/emXetm0beXl50dixY4Uft5aWllJYWBh17tyZysvLhWZxFjdcf3XKfOzYMWrUqBFFR0eT3W5XY4rbdvz4cWrdujUNGjRI+D+u2tx0/ZUvc01NDT300EPUuXNnunjxotLD10tmZiZ5eHjQJ598IjqKatx4/ZUv89SpU8nX15cOHTqk9NCK+OSTT8jLy4tyc3NFR1GFG6+/smU+duwY+fr60scff6zksIqy2Wz0wAMP0OOPPy46iuLcfP2VLfOoUaMoJCSEqqurlRxWcRkZGQSANm3aJDqKotx8/ZUr84kTJ8jT05O+/fZbpYZU1RNPPEG9e/cWHUMxvP4Klvnf//43tWvXjmpqapQaUlUpKSkEgH7++WfRURTB609Jipyc73A4EB8fj5EjR8LDw0OJIVXXu3dvhIaGIjY2VnSUeuP1v0yRMqekpKCoqAgjRoxQYrhrpKWl4aOPPsJLL72E1atXKzbu8OHD8d1334HEndKtCF7//1Hi8f3dd9+lkJAQJYa6RnZ2Ng0ePJiqq6vpgw8+IC8vL7JYLIqM/dNPPxEA2r9/vyLjicLrT0RKHWbs2LEDUVFRSgx1jcmTJ6NHjx7w9PTE5MmTUVBQAF9fX0XGfuCBB9CoUSNs375dkfFE4fW/rN5lJiLk5OSge/fuSuS5xsGDB2EwGAAAOp0OrVq1Umxsg8GArl27IicnR7ExnY3X/zf1/kDrqVOnYDabERoaqkSeWikpKTh48CBOnjyJH3/8EfPmzUPLli3x1FNPKTpPaGgo9u/fr+iYzsTr/5t6l/nEiRMAgKCgoPoOVUdgYCCICESE5s2bo1u3bvDz81N0DgBo37491q1bp/i4zsLr/5t6l7mqqgoA4O/vX+8wVwsKCkLjxo0BAK1bt8aDDz6o6PhX+Pv7o7KyUpWxnYHX/zf1Pma2WCwAoNiTAmfz8/OD2WwWHeOO8fr/pt5lvvIivdVqrXcYEWpqauDp6Sk6xh3j9f9NvcvcoEEDAHDZR7eqqqran8EV8fr/pt5lbtq0KQDgzJkz9Q4jwpkzZ3DXXXeJjnHHeP1/U+8yBwcHw2AwID8/X4k8dZw6dQoAcPr0acXHviIvLw8hISGqja82Xv/f1LvM3t7eCAoKwoEDB5TIU2vXrl2YNGkSAGDt2rWIiYnBhQsXFJ0DuPymQFhYmOLjOguv/28U2QW0V69eSE1NVWKoWj179sSKFSsUHfP3SktLkZubi0cffVTVedTG63+ZIudmPPbYY8jIyMDFixeVGM5ptm7dCqPRiIiICNFR6oXX/zJFyjxw4EDYbDZFTw90hsTERERFRanyzpYz8fpfpkiZmzVrhv79+yMmJkaJ4ZyirKwM33//vWrnADsTr///KHEiKRHRqlWrSK/X04EDB5QaUlVTpkyhJk2akNlsFh1FEbz+Cn4G0OFwUMeOHenFF19UakjVVFRUUKNGjejDDz8UHUUxvP4KbzWQmJhIer2esrKylBxWcRMmTKAmTZpIt++cm6+/8jsaPfbYY9S1a1eyWq1KD62Iffv2kdFopPnz54uOogo3Xn/ly3zo0CHy8fGhiRMnKj10vVksFurYsSM98sgjmtlQUGluvP7q7AL6zTffkF6vp3Xr1qkx/B1xOBz08ssvU+PGjen48eOi46jKTddfvf2ZX3nlFTKZTLRr1y61prgtkyZNIqPRSBs2bBAdxSnccP3VK7PVaqUnn3ySmjRpIvwJyUcffUQ6nY4WLVokNIczueH6q3sZCIvFQoMGDSI/Pz8hj4hWq5XeeOMN0uv1NGfOHKfPL5qbrb/6F+ixWq00atQo0uv1NHnyZKddW6OoqIh69+5NPj4+lJyc7JQ5tciN1t951wGcO3cueXt7U48ePSg7O1u1eex2O82bN48aN25MoaGhtG/fPtXmciVusP7OvULrgQMHqHfv3mQwGGj06NGUn5+v2NgOh4PWrFlD3bp1I6PRSBMmTKCqqirFxpeB5Ovv/MsNOxwOiouLo5CQEDIYDPTnP/+ZVq5cedMNsm/2mmRJSQnNmDGDwsPDSafT0VNPPUU5OTlqRJfCnaz/zWho/ZN0RGK2wHQ4HEhOTsaCBQuwfft2mEwm9OnTB3369EF4eDhCQkLQokULeHl54csvv8SECRNw4cIFFBYWIj8/H9nZ2di2bRv27NkDPz8/DBkyBG+++SY6d+4s4sdxObe6/j4+PrV/R+PrnyyszFcrKirC2rVrsW3bNqSnp9f5zJler6/dWecKg8GA0NBQREVFoW/fvhgwYAC8vb1FRJfCzdbfYDDAZDLV2ahFo+uvjTL/Xnl5OfLz81FaWoqvvvoKP/zwA6ZMmYJevXqhbdu2CA4Odum9LrTu6vU3m82wWCwICAiAv7+/ltdfm2W+wmq1omnTpqisrMSYMWPwzTffiI7EtCtZkU+aqGXTpk21v94SExNRXV0tOBHTMk2XOT4+vnb7KYvFgo0bNwpOxLRMs2W+ePEiVq5cWbuHmsFgwNKlSwWnYlqm2TKvXr26zmGFzWbDmjVrXHZPNaY+zZY5Li4Oen3deFarFatWrRKUiGmdJstcXl6OLVu2wG6317ldp9MhLi5OUCqmdZos87Jly64pMgDY7XZs3boVpaWlAlIxrdNkmW921U6dTofly5c7MQ1zFZorc0lJCTIyMuBwOK77fYfD4VI79zDn0VyZExISrnnidzWHw4GsrCwUFhY6LxRzCZorc2xsLGw2203vQ0RITk52UiLmKhTZn1kpxcXFqKioQOvWrWtvq66uxoULF9CsWbM6983MzHR2PKZxmj7RCACSkpIQHR0Njcdk4mn7RCPGbgeXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJo1rds5ftWoVRo4cKSLLdTkcDvj4+CAgIEB0lDouXLggOgL7nWvKXFNTg8rKSsyfP19EHs3Lzs7mtdGoG17T5LXXXnNmDpcREBDAZdYoPmZm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTRcoswOh0N0BOYCNF9mh8OBL7/8UnQM5gKMN/pGQECAM3PckN1ux8WLF/HBBx+IjgIAsFqtoiOwG7imzN27d8f8+fNFZLmuuLg4pKam4tVXX0VYWJjoOEzDdEREokPciNVqRdOmTVFZWYkxY8bgm2++ER2JaVeypo+ZN23ahMrKSgBAYmIiqqurBSdiWqbpMsfHx8PDwwMAYLFYsHHjRsGJmJZptswXL17EypUra59wGQwGLF26VHAqpmWaLfPq1avrHFbYbDasWbMGZrNZYCqmZZotc1xcHPT6uvGsVitWrVolKBHTOk2Wuby8HFu2bIHdbq9zu06nQ1xcnKBUTOs0WeZly5ZdU2Tg8hsoW7duRWlpqYBUTOs0WebY2Ngbfk+n02H58uVOTMNchebKXFJSgoyMjBueXORwOBATE+PkVMwVaK7MCQkJ1zzxu5rD4UBWVhYKCwudF4q5BM2VOTY2Fjab7ab3ISIkJyc7KRFzFTc8a06E4uJiVFRUoHXr1rW3VVdX48KFC2jWrFmd+2ZmZjo7HtM4TZ9oBABJSUmIjo6GxmMy8bR9ohFjt4PLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zkwaXmUmDy8ykwWVm0uAyM2lwmZk0uMxMGlxmJg0uM5MGl5lJg8vMpMFlZtLgMjNpcJmZNLjMTBpcZiYNLjOTBpeZSYPLzKTBZWbS4DIzaXCZmTS4zEwaXGYmDS4zk4Zmds632Wz48ccfkZ6ejry8POTl5eH06dOoqqqCyWRCUFAQfH190a5dO4SGhqJz586IjIxEhw4dREdnGiF053ybzYbNmzcjJiYG69evh9lsRqtWrdCpUyeEhISgVatW8PX1hY+PDyorK2GxWFBYWIjc3Fzs378fFosFQUFBeOGFFzBixAjcd999on4UJl4ySIBLly7R7NmzKTAwkHQ6HUVERNCcOXMoLy/vlseorq6mtLQ0mjhxIrVr144AUL9+/Wj79u3qBWdaluT0Mq9YsYLatm1LPj4+9Prrr9ORI0fqPabdbqdNmzZRVFQUAaD+/fvT4cOHFUjLXIjzynz27Fl6+umnSafT0ciRI6mkpESVeVJSUig8PJy8vb3piy++IIfDoco8THOcU+bMzExq27YttWvXzimHAVarlaZOnUpGo5EGDx5MFRUVqs/JhFO/zGvXriVfX18aNGgQnTt3Tu3p6sjIyKDWrVvT/fffT8XFxU6dmzldkqqvM69YsQLPPPMMhg0bhtWrV6Nx48ZqTneNXr16ISMjA7/++isiIiJQUlLi1PmZc6n20tz27dsxYMAAjBkzBrNmzYJOp1NjmltSVlaG3r17w9PTEykpKWjYsKGwLEw1yaqUubCwEF27dsWf/vQnxMfH3/TC7s5y4sQJ9OrVC126dMHatWuF/udiqlD+Cq1WqxUvvPAC2rZti8WLF2uiyADQrl07LFu2DJs3b8Znn30mOg5Tg9JH4VOnTiVfX186dOiQ0kMr4pNPPiEvLy/Kzc0VHYUpK0nRw4zCwkJ07NgRkyZNwrvvvqvUsIqy2+3o3r07mjRpgi1btoiOw5Sj7DHz6NGjsXPnTuzfvx+enp5KDau4nTt34pFHHsGmTZvwpz/9SXQcpgzlynzy5EkEBwdj/vz5GDlypBJDqqp///64dOkSUlJSREdhylDuCeCsWbPQokULDB8+XKkhVfXuu+8iNTUVu3fvFh2FKUSRMjscDsTHx2PkyJHw8PBQYkjV9e7dG6GhoYiNjRUdhSlEkTKnpKSgqKgII0aMUGI4pxk+fDi+++47KPi0gQmkSJm3bt2KkJAQ3HPPPUoM5zQDBw7E6dOncfDgQdFRmAIUKfOOHTsQFRWlxFBO9cADD6BRo0bYvn276ChMAfUuMxEhJycH3bt3VyKPUxkMBnTt2hU5OTmiozAF1PsDradOnYLZbEZoaKgSeeowm82IjY3FiRMncM8996BHjx649957YTAYFJsjNDQU+/fvV2w8Jk69H5lPnDgBAAgKCqrvUHWUl5ejW7du6NSpE9577z2sW7cO4eHhePjhh/HPf/5TsXnat2+P48ePKzYeE6feZa6qqgIA+Pv71zvM1T799FNUV1cjIiICJpMJ7733HoDLr0DMmDFDsXn8/f1RWVmp2HhMnHqX2WKxAAB8fX3rHeZqR48eRVlZGWpqagAA999/P0wmE06ePKnoPH5+fjCbzYqOycSod5mvvElitVrrHeZqUVFRuHjxItLT0wFcPuyoqalBv379FJ2npqZG0+eRsFtX7yeADRo0AHD5yZqPj0+9A13x6quv4siRI/jb3/6Gjz/+GNu3b8d///tf9O/fX7E5gMuHSVd+Buba6l3mpk2bAgDOnDmDu+66q96BrjAajWjZsiUWL16Mpk2b4qmnnoKXl5di41+hdG4mTr0PM4KDg2EwGJCfn69Enlpff/01li1bBqvVipqaGpw4caL2yaaS8vLyEBISovi4zPnqXWZvb28EBQXhwIEDSuSp1bJlS+zfvx9RUVHo3LkzQkJC4O/vj379+uH06dOKzXPw4EGEhYUpNh4TR5G3s3v16oXU1FQlhqplMpkwa9YsHD58GGlpadi0aRNWrFiBZs2a4dtvv1VkjtLSUuTm5uLRRx9VZDwmmBIfvlq8eDF5e3uTxWJRYjjKzs6mVq1akc1mu+Z75eXlNG/ePEXmWbp0KXl4eFBVVZUi4zGhlNkEZuDAgbDZbFi9erUSwyEnJwclJSX45ptvcPToUdhsNhw5cgTx8fGYNm0aoqOjFZknMTERUVFR8PPzU2Q8JphS/y2efPJJ6t+/vyJjORwO+vzzzykyMpK8vLzIZDJRz549ad68eVRdXa3IHKWlpeTh4UGxsbGKjMeEU26vuVWrVpFer6cDBw4oNSQREdXU1Cg63hVTpkyhJk2akNlsVmV85nTK7TX31FNP4d5778V///tfpYYEAFU+hnXhwgXMnDkT48aNg8lkUnx8JoZiZdbpdJgyZQoSEhLw448/KjWsKj788EPo9Xq88cYboqMwBSm+11zfvn1RUVGBrKwsGI2auf5PrZycHHTr1g1z5szBa6+9JjoOU47yGyfm5uaia9euGD9+PKZOnark0PV28eJF9OjRAw0bNkRqaqpm9sFjilB+48SwsDDMnDkT06dPx/r165Ue/o4REf7+97+jpKREMzuTMoWp9dTylVdeIZPJRLt27VJritsyadIkMhqNtGHDBtFRmDrUuwyE1WqlJ598kpo0aUJZWVlqTXNLPvroI9LpdLRo0SKhOZiq1L2micVioUGDBpGfn5+QR0Sr1UpvvPEG6fV6mjNnjtPnZ06l/gV6rFYrjRo1ivR6PU2ePPm651uooaioiHr37k0+Pj6UnJzslDmZUM67DuDcuXPJ29ubevToQdnZ2arNY7fbad68edS4cWMKDQ2lffv2qTYX0xTnXqH1wIED1Lt3bzIYDDR69GjKz89XbGyHw0Fr1qyhbt26kdFopAkTJvDZcO7F+ZcbdjgcFBcXRyEhIWQwGOjPf/4zrVy58o5PICopKaEZM2ZQeHg46XQ6euqppygnJ0fh1MwFKHsZiNvhcDiQnJyMBQsWYPv27TCZTOjTpw/69OmD8PBwhISEoEWLFnU+JHvhwgUUFhYiPz8f2dnZ2LZtG/bs2QM/Pz8MGTIEb775Jjp37izix2HiqXPptNtVVFSEtWvXYtu2bUhPT6/zsSiDwQCTyVRnoxaDwYDQ0FBERUWhb9++GDBgALy9vUVEZ9qhjTL/Xnl5OfLz81FaWgqz2QyLxYKAgAD4+/ujbdu2CA4O5r0u2O9ps8yM3QHlz81gTBQuM5MGl5lJ4/8BubxAIaO6jIsAAAAASUVORK5CYII=\n",
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#model serving\n",
"#service oriented architecture\n",
"#key performance indicators"
"from dask import delayed\n",
"\n",
"def f(x):\n",
" print('foo' + str(x))\n",
" return x + 1\n",
"\n",
"def b(x,y):\n",
" print('bar')\n",
" return x + y\n",
"\n",
"def g():\n",
" print('goo')\n",
" return 1\n",
"\n",
"x = delayed(g)()\n",
"y = delayed(f)(x) + delayed(f)(3)\n",
"z = delayed(b)(g(), y)\n",
"\n",
"z.visualize()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0df2b21",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "add91227",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using and Debugging Dask (and other Big Data systems)\n",
"\n",
"In this lecture, we're going to deep dive into using Dask and apply what we've learned in previous lectures to understand why the code works the way that it does.\n",
"\n",
"First, we are going import dask. "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import dask.dataframe as dd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, let's load some data into this notebook."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>hvfhs_license_num</th>\n",
" <th>dispatching_base_num</th>\n",
" <th>pickup_datetime</th>\n",
" <th>dropoff_datetime</th>\n",
" <th>PULocationID</th>\n",
" <th>DOLocationID</th>\n",
" <th>SR_Flag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>HV0003</td>\n",
" <td>B02867</td>\n",
" <td>2019-02-01 00:05:18</td>\n",
" <td>2019-02-01 00:14:57</td>\n",
" <td>245</td>\n",
" <td>251</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>HV0003</td>\n",
" <td>B02879</td>\n",
" <td>2019-02-01 00:41:29</td>\n",
" <td>2019-02-01 00:49:39</td>\n",
" <td>216</td>\n",
" <td>197</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>HV0005</td>\n",
" <td>B02510</td>\n",
" <td>2019-02-01 00:51:34</td>\n",
" <td>2019-02-01 01:28:29</td>\n",
" <td>261</td>\n",
" <td>234</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>HV0005</td>\n",
" <td>B02510</td>\n",
" <td>2019-02-01 00:03:51</td>\n",
" <td>2019-02-01 00:07:16</td>\n",
" <td>87</td>\n",
" <td>87</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>HV0005</td>\n",
" <td>B02510</td>\n",
" <td>2019-02-01 00:09:44</td>\n",
" <td>2019-02-01 00:39:56</td>\n",
" <td>87</td>\n",
" <td>198</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" hvfhs_license_num dispatching_base_num pickup_datetime \\\n",
"0 HV0003 B02867 2019-02-01 00:05:18 \n",
"1 HV0003 B02879 2019-02-01 00:41:29 \n",
"2 HV0005 B02510 2019-02-01 00:51:34 \n",
"3 HV0005 B02510 2019-02-01 00:03:51 \n",
"4 HV0005 B02510 2019-02-01 00:09:44 \n",
"\n",
" dropoff_datetime PULocationID DOLocationID SR_Flag \n",
"0 2019-02-01 00:14:57 245 251 NaN \n",
"1 2019-02-01 00:49:39 216 197 NaN \n",
"2 2019-02-01 01:28:29 261 234 NaN \n",
"3 2019-02-01 00:07:16 87 87 NaN \n",
"4 2019-02-01 00:39:56 87 198 NaN "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = dd.read_csv('fhvhv_tripdata_2019-02.csv')\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's do some basic analysis, how many rows are there in this dataset?"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 39 s, sys: 6.21 s, total: 45.2 s\n",
"Wall time: 37.4 s\n"
]
},
{
"data": {
"text/plain": [
"20159102"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time len(df) #why does this take longer than loading"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also do some more complicated analyses:"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 44 s, sys: 7.48 s, total: 51.5 s\n",
"Wall time: 43.4 s\n"
]
},
{
"data": {
"text/plain": [
"hvfhs_license_num\n",
"HV0002 979266\n",
"HV0003 13504994\n",
"HV0004 983926\n",
"HV0005 4690916\n",
"Name: hvfhs_license_num, dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time df.groupby('hvfhs_license_num')['hvfhs_license_num'].count().compute() #i/o cost"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's redo the above analysis with a slightly different approach."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"df2 = dd.read_parquet('fhvhv_tripdata_2019-02.pqt', columns=['hvfhs_license_num'])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 6.14 s, sys: 896 ms, total: 7.04 s\n",
"Wall time: 6.44 s\n"
]
},
{
"data": {
"text/plain": [
"hvfhs_license_num\n",
"HV0002 979266\n",
"HV0003 13504994\n",
"HV0004 983926\n",
"HV0005 4690916\n",
"Name: hvfhs_license_num, dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time df2.groupby('hvfhs_license_num')['hvfhs_license_num'].count().compute() #why?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Why is this so much faster? Now let's look into the implementation."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.npartitions"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.05 s, sys: 216 ms, total: 1.26 s\n",
"Wall time: 1.29 s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>hvfhs_license_num</th>\n",
" <th>dispatching_base_num</th>\n",
" <th>pickup_datetime</th>\n",
" <th>dropoff_datetime</th>\n",
" <th>PULocationID</th>\n",
" <th>DOLocationID</th>\n",
" <th>SR_Flag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>HV0005</td>\n",
" <td>B02510</td>\n",
" <td>2019-02-03 12:32:29</td>\n",
" <td>2019-02-03 12:46:41</td>\n",
" <td>33</td>\n",
" <td>97</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>HV0005</td>\n",
" <td>B02510</td>\n",
" <td>2019-02-03 12:48:40</td>\n",
" <td>2019-02-03 12:57:35</td>\n",
" <td>97</td>\n",
" <td>106</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>HV0003</td>\n",
" <td>B02764</td>\n",
" <td>2019-02-03 12:51:53</td>\n",
" <td>2019-02-03 13:01:57</td>\n",
" <td>262</td>\n",
" <td>229</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>HV0002</td>\n",
" <td>B02914</td>\n",
" <td>2019-02-03 12:57:56</td>\n",
" <td>2019-02-03 13:15:12</td>\n",
" <td>234</td>\n",
" <td>143</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>HV0003</td>\n",
" <td>B02888</td>\n",
" <td>2019-02-03 12:56:45</td>\n",
" <td>2019-02-03 13:09:11</td>\n",
" <td>234</td>\n",
" <td>230</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" hvfhs_license_num dispatching_base_num pickup_datetime \\\n",
"0 HV0005 B02510 2019-02-03 12:32:29 \n",
"1 HV0005 B02510 2019-02-03 12:48:40 \n",
"2 HV0003 B02764 2019-02-03 12:51:53 \n",
"3 HV0002 B02914 2019-02-03 12:57:56 \n",
"4 HV0003 B02888 2019-02-03 12:56:45 \n",
"\n",
" dropoff_datetime PULocationID DOLocationID SR_Flag \n",
"0 2019-02-03 12:46:41 33 97 1.0 \n",
"1 2019-02-03 12:57:35 97 106 NaN \n",
"2 2019-02-03 13:01:57 262 229 NaN \n",
"3 2019-02-03 13:15:12 234 143 NaN \n",
"4 2019-02-03 13:09:11 234 230 NaN "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time df.get_partition(2).head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Metadata inference failed in `apply`.\n\nYou have supplied a custom function and Dask is unable to \ndetermine the type of output that that function returns. \n\nTo resolve this please provide a meta= keyword.\nThe docstring of the Dask function you ran should have more information.\n\nOriginal error is below:\n------------------------\nValueError(\"time data 'foo' does not match format '%Y-%m-%d %H:%M:%S'\")\n\nTraceback:\n---------\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/utils.py\", line 175, in raise_on_meta_error\n yield\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/core.py\", line 5513, in _emulate\n return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/utils.py\", line 900, in __call__\n return getattr(obj, self.method)(*args, **kwargs)\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/pandas/core/series.py\", line 4045, in apply\n mapped = lib.map_infer(values, f, convert=convert_dtype)\n File \"pandas/_libs/lib.pyx\", line 2228, in pandas._libs.lib.map_infer\n File \"<ipython-input-9-3a0da807679d>\", line 2, in <lambda>\n deltas = df['dropoff_datetime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp()) - df['pickup_datetime'].apply(datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp())\n File \"/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py\", line 577, in _strptime_datetime\n tt, fraction, gmtoff_fraction = _strptime(data_string, format)\n File \"/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py\", line 359, in _strptime\n (data_string, format))\n",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/utils.py\u001b[0m in \u001b[0;36mraise_on_meta_error\u001b[0;34m(funcname, udf)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 175\u001b[0;31m \u001b[0;32myield\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 176\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/core.py\u001b[0m in \u001b[0;36m_emulate\u001b[0;34m(func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5512\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mraise_on_meta_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfuncname\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mudf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"udf\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5513\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0m_extract_meta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0m_extract_meta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5514\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/utils.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 900\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/cmsc21800/venv/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, args, **kwds)\u001b[0m\n\u001b[1;32m 4044\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4045\u001b[0;31m \u001b[0mmapped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconvert_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4046\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m<ipython-input-9-3a0da807679d>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mdatetime\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdeltas\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dropoff_datetime'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d %H:%M:%S'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimestamp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'pickup_datetime'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d %H:%M:%S'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimestamp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 576\u001b[0m format string.\"\"\"\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0mtt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff_fraction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_strptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 578\u001b[0m \u001b[0mtzname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py\u001b[0m in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 358\u001b[0m raise ValueError(\"time data %r does not match format %r\" %\n\u001b[0;32m--> 359\u001b[0;31m (data_string, format))\n\u001b[0m\u001b[1;32m 360\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: time data 'foo' does not match format '%Y-%m-%d %H:%M:%S'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-9-3a0da807679d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mdatetime\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdeltas\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dropoff_datetime'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d %H:%M:%S'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimestamp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'pickup_datetime'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%Y-%m-%d %H:%M:%S'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimestamp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#How would you fix this?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/core.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, meta, args, **kwds)\u001b[0m\n\u001b[1;32m 3518\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3519\u001b[0m \u001b[0mudf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3520\u001b[0;31m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3521\u001b[0m )\n\u001b[1;32m 3522\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwarn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmeta_warning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmeta\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/core.py\u001b[0m in \u001b[0;36m_emulate\u001b[0;34m(func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 5511\u001b[0m \"\"\"\n\u001b[1;32m 5512\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mraise_on_meta_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfuncname\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mudf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"udf\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5513\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0m_extract_meta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0m_extract_meta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5514\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5515\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/contextlib.py\u001b[0m in \u001b[0;36m__exit__\u001b[0;34m(self, type, value, traceback)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 130\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgen\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mthrow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 131\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;31m# Suppress StopIteration *unless* it's the same exception that\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/utils.py\u001b[0m in \u001b[0;36mraise_on_meta_error\u001b[0;34m(funcname, udf)\u001b[0m\n\u001b[1;32m 194\u001b[0m )\n\u001b[1;32m 195\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" in `{0}`\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfuncname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfuncname\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrepr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 196\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Metadata inference failed in `apply`.\n\nYou have supplied a custom function and Dask is unable to \ndetermine the type of output that that function returns. \n\nTo resolve this please provide a meta= keyword.\nThe docstring of the Dask function you ran should have more information.\n\nOriginal error is below:\n------------------------\nValueError(\"time data 'foo' does not match format '%Y-%m-%d %H:%M:%S'\")\n\nTraceback:\n---------\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/utils.py\", line 175, in raise_on_meta_error\n yield\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/core.py\", line 5513, in _emulate\n return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/utils.py\", line 900, in __call__\n return getattr(obj, self.method)(*args, **kwargs)\n File \"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/pandas/core/series.py\", line 4045, in apply\n mapped = lib.map_infer(values, f, convert=convert_dtype)\n File \"pandas/_libs/lib.pyx\", line 2228, in pandas._libs.lib.map_infer\n File \"<ipython-input-9-3a0da807679d>\", line 2, in <lambda>\n deltas = df['dropoff_datetime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp()) - df['pickup_datetime'].apply(datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp())\n File \"/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py\", line 577, in _strptime_datetime\n tt, fraction, gmtoff_fraction = _strptime(data_string, format)\n File \"/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py\", line 359, in _strptime\n (data_string, format))\n"
]
}
],
"source": [
"from datetime import datetime\n",
"deltas = df['dropoff_datetime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp()) - df['pickup_datetime'].apply(datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp())\n",
"\n",
"#How would you fix this?"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"def safe_strptime(x):\n",
" try:\n",
" time = datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp()\n",
" return time\n",
" except:\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/core.py:3522: UserWarning: \n",
"You did not provide metadata, so Dask is running your function on a small dataset to guess output types. It is possible that Dask will guess incorrectly.\n",
"To provide an explicit output types or to silence this message, please provide the `meta=` keyword, as described in the map or apply function that you are using.\n",
" Before: .apply(func)\n",
" After: .apply(func, meta=('dropoff_datetime', 'int64'))\n",
"\n",
" warnings.warn(meta_warning(meta))\n",
"/Users/sanjaykrishnan/Documents/cmsc21800/venv/lib/python3.7/site-packages/dask/dataframe/core.py:3522: UserWarning: \n",
"You did not provide metadata, so Dask is running your function on a small dataset to guess output types. It is possible that Dask will guess incorrectly.\n",
"To provide an explicit output types or to silence this message, please provide the `meta=` keyword, as described in the map or apply function that you are using.\n",
" Before: .apply(func)\n",
" After: .apply(func, meta=('pickup_datetime', 'int64'))\n",
"\n",
" warnings.warn(meta_warning(meta))\n"
]
}
],
"source": [
"deltas = df['dropoff_datetime'].apply(safe_strptime) - df['pickup_datetime'].apply(safe_strptime)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 31.1 s, sys: 381 ms, total: 31.5 s\n",
"Wall time: 33 s\n"
]
},
{
"data": {
"text/plain": [
"0 579.0\n",
"1 490.0\n",
"2 2215.0\n",
"3 205.0\n",
"4 1812.0\n",
"dtype: float64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time deltas.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 33 s, sys: 575 ms, total: 33.5 s\n",
"Wall time: 36.3 s\n"
]
},
{
"data": {
"text/plain": [
"0 579.0\n",
"1 490.0\n",
"2 2215.0\n",
"3 205.0\n",
"4 1812.0\n",
" ... \n",
"95 635.0\n",
"96 2116.0\n",
"97 2886.0\n",
"98 1489.0\n",
"99 1503.0\n",
"Length: 100, dtype: float64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time deltas.head(100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's try to fix this!"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"deltas = deltas.repartition(1000)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 29.2 s, sys: 504 ms, total: 29.7 s\n",
"Wall time: 30.5 s\n"
]
},
{
"data": {
"text/plain": [
"0 579.0\n",
"1 490.0\n",
"2 2215.0\n",
"3 205.0\n",
"4 1812.0\n",
"dtype: float64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time deltas.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 34.9 s, sys: 740 ms, total: 35.6 s\n",
"Wall time: 43.8 s\n"
]
},
{
"data": {
"text/plain": [
"0 579.0\n",
"1 490.0\n",
"2 2215.0\n",
"3 205.0\n",
"4 1812.0\n",
" ... \n",
"1995 1698.0\n",
"1996 1672.0\n",
"1997 1948.0\n",
"1998 951.0\n",
"1999 685.0\n",
"Length: 2000, dtype: float64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time deltas.head(2000)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's try to get the total number of seconds across the whole dask frame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],