{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 2.8 N-grams\n", "\n", "We can break our text down into n-grams to check our preprocessing, explore the contents of our data, or create new features for machine learning. An n-gram is simply a sequence of neighbouring n words (or tokens), where n can be any number. " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import nltk\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['the', 'rise', 'of', 'artificial', 'intelligence', 'has', 'led', 'to', 'significant', 'advancements', 'in', 'natural', 'language', 'processing', 'computer', 'vision', 'and', 'other', 'fields', 'machine', 'learning', 'algorithms', 'are', 'becoming', 'more', 'sophisticated', 'enabling', 'computers', 'to', 'perform', 'complex', 'tasks', 'that', 'were', 'once', 'thought', 'to', 'be', 'the', 'exclusive', 'domain', 'of', 'humans', 'with', 'the', 'advent', 'of', 'deep', 'learning', 'neural', 'networks', 'have', 'become', 'even', 'more', 'powerful', 'capable', 'of', 'processing', 'vast', 'amounts', 'of', 'data', 'and', 'learning', 'from', 'it', 'in', 'ways', 'that', 'were', 'not', 'possible', 'before', 'as', 'a', 'result', 'ai', 'is', 'increasingly', 'being', 'used', 'in', 'a', 'wide', 'range', 'of', 'industries', 'from', 'healthcare', 'to', 'finance', 'to', 'transportation', 'and', 'its', 'impact', 'is', 'only', 'set', 'to', 'grow', 'in', 'the', 'years', 'to', 'come']\n" ] } ], "source": [ "tokens = ['the', 'rise', 'of', 'artificial', 'intelligence', 'has', 'led', 'to', 'significant', 'advancements', 'in', 'natural', 'language', 'processing', 'computer', 'vision', 'and', 'other', 'fields', 'machine', 'learning', 'algorithms', 'are', 'becoming', 'more', 'sophisticated', 'enabling', 'computers', 'to', 'perform', 'complex', 'tasks', 'that', 'were', 'once', 'thought', 'to', 'be', 'the', 'exclusive', 'domain', 'of', 'humans', 'with', 'the', 'advent', 'of', 'deep', 'learning', 'neural', 'networks', 'have', 'become', 'even', 'more', 'powerful', 'capable', 'of', 'processing', 'vast', 'amounts', 'of', 'data', 'and', 'learning', 'from', 'it', 'in', 'ways', 'that', 'were', 'not', 'possible', 'before', 'as', 'a', 'result', 'ai', 'is', 'increasingly', 'being', 'used', 'in', 'a', 'wide', 'range', 'of', 'industries', 'from', 'healthcare', 'to', 'finance', 'to', 'transportation', 'and', 'its', 'impact', 'is', 'only', 'set', 'to', 'grow', 'in', 'the', 'years', 'to', 'come']\n", "print(tokens)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(to,) 7\n", "(of,) 6\n", "(in,) 4\n", "(the,) 4\n", "(and,) 3\n", " ..\n", "(result,) 1\n", "(domain,) 1\n", "(rise,) 1\n", "(only,) 1\n", "(be,) 1\n", "Length: 79, dtype: int64\n" ] } ], "source": [ "# unigrams: n=1\n", "unigrams = (pd.Series(nltk.ngrams(tokens, 1)).value_counts()) \n", "print(unigrams)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 0, '# of Occurances')" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAwEAAAHwCAYAAAD+TmOAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAy7klEQVR4nO3deZwlVX338c/XGXFABtGZicLIrmgEBXVQeQAVNUYTojEmohAESYJJNC64RMXHYJ5gjBo1EZegIkuM+yCoERFEUCPosINi3AZlkWGXXYHf80dVy6XtnumZ6du36fN5v1796rqnTlX9qnrE+t5z6t5UFZIkSZLacZ9RFyBJkiRpZhkCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSSOR5KlJLh11HesryUVJnjrqOsZLsmWSm5LMG3UtkmYfQ4CkOSvJy5OsSHJ7kqMmWP/0JBcnuSXJqUm2Ws2+vp6kkuw0rv3zfftT17PWlUmesZr1T01yV39TN/bzhfU55kzrr9PDpnF/ByS5oP/7/SLJB5NsOl37n6qq2qGqvj7d+01yaJL/nKB9Stexqn5WVRtX1Z3TXZukez9DgKS57HLgn4Ajx69IshhYDvxf4EHACuBTa9jf/wIvHtjHIuBJwFXTVO+aXN7f1I39/NH4Dknmz1AtI5XkNcC/AK8DHkD3d9gK+GqSDWaohjl7refyuUnqGAIkzVlVtbyqPg9cM8HqPwEuqqrPVNVtwKHATkkeuZpdfhzYe2B6xYuA44BfjXVIcr8k701yef/z3iT369ctTvLFJNcnuTbJN5LcJ8mxwJbAF/p3+F8/1XPs3w3/VpL3JLkWOLSv4V1JfpbkyiQfSrLhwDavS3JFX9+Bg+8s9yMefzlu/98ceP3IJF/t6/9BkhcMrDsqyfuTfCnJjUnOTLJdv+70vtt5/TnuPe48Xpfkc+Pa3pfkvROc8ybAW4G/q6oTq+rXVbUSeAFdEPjzvt+8JG9K8uO+nrOSbNGv22HgPK5M8qaBc/ingWPdY8pSP2Lz90nOB25OMn9wFKd/9/7TSY7pj3lRkmUD2z8uyTn9us8k+dTg8dZW//f6f/2/gRuTnNQHXJJs3f9t5/evt0lyet/v5P5v9Z/j+v5Fkp8BX+vbP9OPstzQb7vDwLGPSvKBJF/u/6bfSvKQ/t/8delG2R470P/vk1zWH/8HSZ6+ructaf0ZAiS1agfgvLEXVXUz8OO+fTKXA98Dntm/fjFwzLg+h9C9K70zsBPwBODN/brXAJcCS4AHA2/qDl37AT8D/qh/h/8da3kuTwR+AvwOcBjdO+Tb9zU8DFgKvAUgybOA1wK/BzwcmHQK0nhJ7g98Ffiv/lgvAj4weGPYt70VeCDwo74equrJ/fqd+nMcP+ryn8Cz0k/n6W9c9waOnaCU/wMsoBvJ+Y2qugn4cn9uAAf39fwBsAlwIHBLkoXAycCJwOZ01+iUqV6Hfp9/CGxaVXdMsP45wCeBTYETgMP7c9qALjQeRTf69AngeWtx3MnsA7yE7m+yAd3fdyL/BXwHWEQXeveboM9TgN8Ffr9//WW6fye/A5xNF4QHvYDu3/di4Hbg232/xcBngXcDJHkE8HJgl6pa2O9/5dqcpKTpZQiQ1KqNgRvGtd0ALFzDdscAL+5vajatqm+PW78v8I9VtaqqrqK7IR672fo1sBmwVf/u9Teqqtai5s3TjSKM/Yy9C395Vb2vvyG9Dfgr4NVVdW1V3Qi8DXhh3/cFwMeq6sI++By6FsffC1hZVR+rqjuq6mzgc8CfDvRZXlXf6Wv5OF0QWaOqugI4HfizvulZwNVVddYE3Rf36ya6Ab+iXw/wl8Cbq+oH1Tmvqq7pz+MXVfWvVXVbVd1YVWdOpc7ev1fVz6vq1knWf7Oq/rufi38sXRiELhzO77f/dVUtp7spX18fq6r/7ev5NBNc8yRbArsAb6mqX1XVN+kCyniHVtXNY+dWVUf21+d27h4te8BA/+Oq6qx+NO044LaqOqY/908BYyMBdwL3Ax6V5L5VtbKqfjwN5y5pHRkCJLXqJrp3hwdtAty4hu2WA08D/o6J36XeHLhk4PUlfRvAO+neHT8pyU+SvGEta768qjYd+Pl03/7zgT5LgI2As8bCAt073ksG6hvsP1jrmmwFPHEwiNCFnocM9PnFwPItdGFrqo6mn8rT/57o+gJcDSzOxPPWN+vXA2xBN7oz3mTtU/XzNawffw0W9LVuDlw2Lvitbl93APcdbEgy9vrXqzneRNd8c+DaqrplDcf+TVs/nert/XSqX3L3O/eLB/pfObB86wSvNwaoqh8Br6ILEquSfDLJ5kgaGUOApFZdxN3v0I5Nddmub59UfxP1ZeBvmPgm9XK6m+UxW/Zt9O+ovqaqtgX+CDh4YF702owI/FZZA8tX09187TAQFh5QVWM3hlfQ3QQP1jfoZroQMWbwBv/nwGnjgsjGVfU361H7oM8Dj0myI9279eOnnoz5Nt3Ukz8ZbOz/hs/m7qk9P6f7m443WTus/vzHrOvf6gpgaZIMtG0xWWe6KWJbj2vbhu5d9cvW4dgPSjJ4bhMde/Dc9gGeSzdl7AEDtYR1UFX/VVW70/3vo+imrUkaEUOApDmrf2hzATAPmJdkwcC7x8cBOyZ5ft/nLcD5VXXxFHb9JuAp/cOo430CeHOSJf0Dmm+hm+9Okr2SPKy/Cfwl3c3c2Mc3Xglsu25nerequgv4MPCeJL/TH3dpkrE53p8GDkjyqP6G8B/G7eJc4E+SbJTuYeG/GFj3RWD7JPsluW//s0uS351ieas9x35KyWfp565X1c8m6XcD3TSr9yV5Vl/H1sBn6J65GAtnHwH+X5KHp/OYdJ/o9EXgIUlele4h6oVJnjhw/n+Q5EFJHkL37vV0+Tbd3/vl/b/N59I9MzKZE4FHDFzvB9FN7frsJFOhJlVVl9B9AtahSTZIsitdEF2dhXRh6xq6YPS2tTnmoCSPSPK0dA/J30YXVP3oUmmEDAGS5rI3091svIFuesmtfRv9fP3n0z24eh3dw7UvnHg391RVl/dzqifyT3Q3W+cDF9A9JDn26S8Pp3sg9Sa6G8IPDHy+/D/ThYfrk0z2YOdU/T3dtKMz+mkcJwOP6Gv/MvBeuk9/+VH/e9B76D7t6Eq66Tm/eTe+f77gmXTX6XK6aSj/QjfXeyoOBY4e9zzDeEcDj2byqUBjtbyDLoy9iy5QnUn3Dv/T+/nr0D2U+mngpL7PR4EN+/P4Pbqb4F8APwT27Lc5lu6B8ZX9dmv62Ngpq6pf0Y1e/AVwPd2/yS/S3WhP1H8V3UPNLwVWARfSPbeyriMv+wK70t3U/xPduU147N4xdNPFLqN7IP6MdTwudP9G3k43UvULugeN37Qe+5O0nrJ2z6RJkuaaJAU8vJ+3PepatgQuBh5SVb8cdT3DluRM4ENV9bERHPtTwMVVNX40SFIDHAmQJM0KSe5D97Gen5yrASDJU/rP0p+fZH/gMXTTfmbi2Lsk2S7dd1M8i26+/+dn4tiSZh+/EVCSNHL9Q71X0k0/edaIyxmmR9BNUdqY7hOK/rT/eNSZ8BC6T7daRPfsxN9U1TkzdGxJs4zTgSRJkqTGOB1IkiRJaowhQJIkSWqMzwSMwOLFi2vrrbcedRmSJEma484666yrq2rJ+HZDwAhsvfXWrFixYtRlSJIkaY5LcslE7U4HkiRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhpjCJAkSZIa45eFjcI1l8HRh4y6CkmSJA3b/oeNuoIJORIgSZIkNcYQIEmSJDXGECBJkiQ1xhAgSZIkNcYQIEmSJDXGECBJkiQ1xhAgSZIkNcYQIEmSJDXGECBJkiQ1xhAgSZIkNcYQIEmSJDXGECBJkiQ1xhAgSZIkNcYQIEmSJDXGECBJkiQ1xhAwIMmGSU5Lsl2SfdZy2yVJThxWbZIkSdJ0MQTc04HAcmALYK1CQFVdBVyRZLdhFCZJkiRNF0PAPe0LHA+8HdgjyblJXp1kQZKPJbkgyTlJ9pxk+8/3+5AkSZJmLUNAL8kGwLZVtRJ4A/CNqtq5qt4DvAygqh4NvAg4OsmCCXazAthjkv0flGRFkhVX3XjLUM5BkiRJmgpDwN0WA9dPsm534FiAqroYuATYfoJ+q4DNJ9pBVR1RVcuqatmShRutf7WSJEnSOjIE3O1WYKJ39wEyxX0s6PcjSZIkzVqGgF5VXQfM66f53AgsHFh9Ov1c/yTbA1sCP0iyNMkpA/22By6coZIlSZKkdWIIuKeT6Kb+nA/ckeS8JK8GPkAXEC4APgUcUFW3A5sBdwxsvyfwpRmuWZIkSVor80ddwCxzOHBwVZ0MPH3cugMm6P8k4P0Dr58DPHc4pUmSJEnTwxAwoKrOSXJqknlVdecU+h8+tpxkCfDuflqRJEmSNGsZAsapqiPXcbur6L4nQJIkSZrVfCZAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWrM/FEX0KRFS2H/w0ZdhSRJkhrlSIAkSZLUGEOAJEmS1BhDgCRJktQYnwkYhWsug6MPGXUVkiSpVT6b2DxHAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAgYk2TDJaUnmrabPO5NclOSd49r3SvLW4VcpSZIkrR9DwD0dCCyvqjtX0+elwOOq6nXj2r8EPCfJRkOrTpIkSZoGhoB72hc4Pp13JrkwyQVJ9gZIcgJwf+DMsbYxVVXA14G9ZrpoSZIkaW3MH3UBs0WSDYBtq2plkucDOwM7AYuB7yY5vaqek+Smqtp5kt2sAPYAPj3B/g8CDgLYctEmQzgDSZIkaWocCbjbYuD6fnl34BNVdWdVXQmcBuwyhX2sAjafaEVVHVFVy6pq2ZKFzhiSJEnS6BgC7nYrsKBfzjruY0G/H0mSJGnWMgT0quo6YF6SBcDpwN5J5iVZAjwZ+M74bZI8L8k/DzRtD1w4IwVLkiRJ68hnAu7pJLqpQMcBuwLnAQW8vqp+MUH/7YBfDrzeE3jjsIuUJEmS1ocjAfd0OLB/dV5XVTtW1aOr6lNjHapq44H+OwMfAUjyYGDDqrpgRiuWJEmS1pIjAQOq6pwkpyaZt4bvChjr/+cDL7cEXjO86iRJkqTpYQgYp6qOXMftvjvdtUiSJEnD4HQgSZIkqTGGAEmSJKkxhgBJkiSpMYYASZIkqTGGAEmSJKkxhgBJkiSpMYYASZIkqTGGAEmSJKkxhgBJkiSpMYYASZIkqTGGAEmSJKkx80ddQJMWLYX9Dxt1FZIkSWqUIwGSJElSYwwBkiRJUmMMAZIkSVJjfCZgFK65DI4+ZNRVSJLWxOe3JM1RjgRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNaTIEJNkwyWlJtkuyz0D7AUkOX4/9vivJ06anSkmSJGk4mgwBwIHAcmALYJ819F0b7wPeMI37kyRJkqZdqyFgX+B44O3AHknOTfLqft3mSU5M8sMk7xjbIMkzk3w7ydlJPpNk4/E7rapLgEVJHjIjZyFJkiStg+ZCQJINgG2raiXdu/bfqKqdq+o9fZedgb2BRwN7J9kiyWLgzcAzqupxwArg4EkOcTaw2wTHPSjJiiQrrrrxlmk9J0mSJGltzB91ASOwGLh+NetPqaobAJJ8D9gK2BR4FPCtJAAbAN+eZPtVwObjG6vqCOAIgGXbbFbrVrokSZK0/loMAbcCC1az/vaB5TvprlGAr1bVi6aw/wX9MSRJkqRZqbnpQFV1HTAvyQLgRmDhFDY7A9gtycMAkmyUZPt++Z+TPG+g7/bAhdNctiRJkjRtmgsBvZOA3YHzgTuSnDfwYPBvqaqrgAOATyQ5ny4UPLJf/WjgFwBJ7gs8jO6ZAUmSJGlWanE6EMDhwMFVdTLw9HHrjhpbqKq9Bpa/Buwywb7uW1VjzwfsBXy2qu6Y3nIlSZKk6dPkSEBVnQOcmmTeNOzr9wdezgf+dX33KUmSJA1TqyMBVNWRQ9jnZ6Z7n5IkSdJ0a3IkQJIkSWqZIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqzPxRF9CkRUth/8NGXYUkSZIa5UiAJEmS1BhDgCRJktQYQ4AkSZLUGJ8JGIVrLoOjDxl1FZKkNfH5LUlzlCMBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAHjJNkwyWlJlib57Fpu+8kkDx9WbZIkSdJ0MAT8tgOB5VV1WVX96Vpu+0Hg9UOoSZIkSZo2hoDfti9wfJKtk1wIkOSAJMuTnJjkh0neMcm23wCekWT+jFUrSZIkrSVDwIAkGwDbVtXKCVbvDOwNPBrYO8kW4ztU1V3Aj4CdhlimJEmStF4MAfe0GLh+knWnVNUNVXUb8D1gq0n6rQI2H9+Y5KAkK5KsuOrGW6alWEmSJGldGALu6VZgwSTrbh9YvhOYbMrPgn4/91BVR1TVsqpatmThRutXpSRJkrQeDAEDquo6YF6SyYLAb0lyTJInDDRtD1w07cVJkiRJ08QQ8NtOAnZfi/6PAa4ASPJg4NaqumIYhUmSJEnTwU+x+W2HAwdX1X7AjgBVdRRw1FiHqtoLIMkmwA+r6uf9qn2A/5jJYiVJkqS15UjAOFV1DnBqknlT6PvLqvqzgabrgaOHVZskSZI0HRwJmEBVHbmO231sumuRJEmSppsjAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSY+aPuoAmLVoK+x826iokSZLUKEcCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMb4YPAoXHMZHH3IqKuQdG/lBwtIktaTIwGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmMMAZIkSVJjDAGSJElSYwwBkiRJUmNmXQhIsmGS05Jsl+TCGTjePyZ5xjTt6+QkD5yOfUmSJEnDMutCAHAgsBy4czp2ls6k51lVb6mqk6fjWMCxwN9O074kSZKkoZiNIWBf4PjBhiTzkrwzyXeTnJ/kpX37xklOSXJ2kguSPLdv3zrJ95N8ADgb2KN//eEkFyU5KcmGfd+jkvxpv7wyyVsH9vfIvn1Jkq/27f+R5JIkiyeo/QTgRUO7MpIkSdI0mFUhIMkGwLZVtXLcqr8AbqiqXYBdgL9Ksg1wG/C8qnocsCfwr0nSb/MI4JiqeixwCfBw4P1VtQNwPfD8Scq4ut/fB4HX9m3/AHytbz8O2HKiDavqOuB+SRZNcG4HJVmRZMVVN96ypkshSZIkDc2sCgHAYrob9PGeCbw4ybnAmcAiupv6AG9Lcj5wMrAUeHC/zSVVdcbAPn5aVef2y2cBW09Sw/IJ+uwOfBKgqk4ErlvNOawCNh/fWFVHVNWyqlq2ZOFGq9lckiRJGq75a9M5ySaD21TVtdNcz63AgokODfxdVX1lXD0HAEuAx1fVr5OsHNj+5nH7uH1g+U5gw0lquH2gz9i5ZpK+E1lAdx6SJEnSrDSlkYAkL01yJXA+3TvkZwErpruYfjrNvCTjg8BXgL9Jct++nu2T3B94ALCqDwB7AltNd029bwIv6I/9TOA3nwDUP5OwtF8O8BBg5ZDqkCRJktbbVEcCXgvsUFVXD7OY3kl0029+NND2EbqpOWf3N9pXAX8MfBz4QpIVwLnAxUOq6a3AJ5LsDZwGXAHc2H/q0MOAsRGRxwNnVNUdQ6pDkiRJWm9TDQE/BmbqadbDgYOraj9gR4Cqugt4U/8z3q6T7GfHsYX+QePB1+8aWD5gYHnrgeUVwFP7lzcAv19VdyTZFdizqm5PsiPwuaoam/6zH/CBqZykJEmSNCpTDQFvBP4nyZkMzK2vqldMd0FVdU6SU5PMq6pp+a6AabAl8On+nf9fAX8FUFUXAgcP9Luwqk4ZQX2SJEnSlE01BPwH8DXgAuCu4ZXTqaojh32MtVFVPwQeO4V+H56BciRJkqT1MtUQcEdVHbzmbpIkSZJmu6l+T8Cp/ZddbZbkQWM/Q61MkiRJ0lBMdSRgn/73GwfaCth2esuRJEmSNGxTCgFVtc2wC5EkSZI0M6b8jcH9x2E+ioFv9K2qY4ZRlCRJkqThmVIISPIPdJ+Z/yjgv4Fn032LriFAkiRJupeZ6oPBfwo8HfhFVb0E2Am439CqkiRJkjQ0Uw0Bt/bf2ntHkk2AVfhQsCRJknSvNNVnAlYk2RT4MHAWcBPwnWEVJUmSJGl41hgCkgT456q6HvhQkhOBTarq/GEXJ0mSJGn6rXE6UFUV8PmB1ysNAJIkSdK911SnA52RZJeq+u5Qq2nFoqWw/2GjrkKSJEmNmmoI2BN4aZJLgJuB0A0SPGZolUmSJEkaiqmGgGcPtQpJkiRJM2aqIeDGKbZJkiRJmuWmGgLOBrYArqObCrQpcEWSVcBfVdVZwylvjrrmMjj6kFFXIeneymeKJEnraapfFnYi8AdVtbiqFtFND/o08LfAB4ZVnCRJkqTpN9UQsKyqvjL2oqpOAp5cVWcA9xtKZZIkSZKGYqrTga5N8vfAJ/vXewPXJZkH3DWUyiRJkiQNxVRHAvYBHkr3pWHHA1v2bfOAFwylMkmSJElDMaWRgKq6Gvi7SVb/aPrKkSRJkjRsqw0BSd5bVa9K8gWgxq+vqucMrTJJkiRJQ7GmkYBj+9/vGnYhkiRJkmbGakPA2Of/V9VpM1OOJEmSpGGb0jMBSXYDDgW26rcJUFW17fBKkyRJkjQMU/2I0I8CrwbOAu4cXjmSJEmShm2qIeCGqvryUCuRJEmSNCOmGgJOTfJOYDlw+1hjVZ09lKokSZIkDc1UQ8AT+9+P73+H7iNDnzbtFUmSJEkaqjV9T8DB/eIX+98FXAV8s6p+OszCJEmSJA3HfdawfmH/s3H/sxBYBnw5yQuHXJskSZKkIVjT9wS8daL2JA8CTgY+OYyihiHJhsCJwNOqap0/4SjJ1sAXq2rHce1LgGOr6lnrVagkSZI0ZGsaCZhQVV1L91zAvcmBwPL1CQCrU1VXAVf036kgSZIkzVrrFAKSPA24bpprGbZ9geOTbJzklCRnJ7kgyXOhe4c/yfeTfDjJRUlO6kcPSPL4JOcl+TbwstUc4/P9cSRJkqRZa7UhoL9JPn/cz6XA24G/nZkS11+SDYBtq2olcBvwvKp6HLAn8K9JxkY1Hg68v6p2AK4Hnt+3fwx4RVXtuoZDrQD2mKSGg5KsSLLiqhtvWa/zkSRJktbHmj4idK9xrwu4pqpuHlI9w7KY7qYeumlMb0vyZOAuYCnw4H7dT6vq3H75LGDrJA8ANq2q0/r2Y4FnT3KcVcDmE62oqiOAIwCWbbNZrfOZSJIkSetpTQ8GXzJThQzZrcCCfnlfYAnw+Kr6dZKVA+tuH9jmTmBD7v5OhKlY0B9LkiRJmrXW6ZmAe5uqug6Yl2QB8ABgVR8A9gS2WsO21wM3JNm9b/rNnP8kS5OcMtB9e+DCaS1ekiRJmmZT/cbgueAkYHfg48AXkqwAzgUunsK2LwGOTHIL8JWB9s2AOwZe7wl8aVqqlSRJkoakpRBwOHBwVZ0MTPaA728++7+q3jWwfBaw00C/Q/vfTwLeP9D+HOC501GsJEmSNCzNhICqOifJqUnmTdd3BVTV4WPL/ZeFvbufeiRJkiTNWs2EAICqOnKI+76K7nsCJEmSpFmtiQeDJUmSJN3NECBJkiQ1xhAgSZIkNcYQIEmSJDXGECBJkiQ1xhAgSZIkNcYQIEmSJDXGECBJkiQ1xhAgSZIkNcYQIEmSJDXGECBJkiQ1Zv6oC2jSoqWw/2GjrkKSJEmNciRAkiRJaowhQJIkSWqMIUCSJElqjM8EjMI1l8HRh4y6Cml6+ZyLJEn3Go4ESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjWkiBCTZMMlpSbZLss9A+wFJDl/Lfb0qyUaTrPtkkoevb72SJEnSMDURAoADgeXAFsA+a+i7Jq8CJgwBwAeB16/n/iVJkqShaiUE7AscD7wd2CPJuUle3a/bPMmJSX6Y5B1jGyT5YJIVSS5K8ta+7RXA5sCpSU6d4DjfAJ6RZP5wT0eSJElad3M+BCTZANi2qlYCbwC+UVU7V9V7+i47A3sDjwb2TrJF335IVS0DHgM8JcljqurfgcuBPatqz/HHqqq7gB8BO01Qx0F9qFhx1Y23TO9JSpIkSWthzocAYDFw/WrWn1JVN1TVbcD3gK369hckORs4B9gBeNQUj7eKbrTgHqrqiKpaVlXLliycbDaRJEmSNHwtTFu5FViwmvW3DyzfCcxPsg3wWmCXqrouyVFr2MegBf0xJUmSpFlpzo8EVNV1wLwkC4AbgYVT2GwT4GbghiQPBp49sO4e+0hyTJInDKzfHrhovQuXJEmShqSFkQCAk4DdgdOAO5KcBxwFXDdR56o6L8k5dDfzPwG+NbD6CODLSa7onwt4DHAFQB8Ybq2qK4Z1IpIkSdL6aiUEHA4cXFUnA08ft+6osYWq2mtg+YCJdlRV7wPeB5BkE+CHVfXzfvU+wH9MW9WSJEnSEMz56UAAVXUO3cd6zpvm/f6yqv5soOl64OjpPIYkSZI03VoZCaCqjpyBY3xs2MeQJEmS1lcTIwGSJEmS7mYIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhozf9QFNGnRUtj/sFFXIUmSpEY5EiBJkiQ1xhAgSZIkNcYQIEmSJDXGZwJG4ZrL4OhDRl2FNL18zkWSpHsNRwIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAJWI8mGSU5LMm+K/U9O8sBh1yVJkiStD0PA6h0ILK+qO6fY/1jgb4dYjyRJkrTeDAGrty9wfJKNk5yS5OwkFyR57iT9TwBeNIP1SZIkSWtt/qgLmK2SbABsW1Urk8wHnldVv0yyGDgjyQlVVYPbVNV1Se6XZFFVXTNufwcBBwFsuWiTmToNSZIk6bc4EjC5xcD1/XKAtyU5HzgZWAo8eJLtVgGbj2+sqiOqallVLVuycKMhlCtJkiRNjSMBk7sVWNAv7wssAR5fVb9OsnJg3XgL+m0lSZKkWcmRgElU1XXAvCQLgAcAq/oAsCew1Vi//lmBpf1ygIcAK0dQsiRJkjQljgSs3knA7sDHgS8kWQGcC1wMkOQ+wMOAa/v+jwfOqKo7Zr5USZIkaWoMAat3OHBwVZ0M7Dp+ZZIdgc9V1dj0n/2AD8xgfZIkSdJaczrQalTVOcCpk31ZWFVdWFUHDzRdWFWnzEx1kiRJ0rpxJGANqurItej74WHWIkmSJE0HRwIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMbMH3UBTVq0FPY/bNRVSJIkqVGOBEiSJEmNMQRIkiRJjTEESJIkSY3xmYBRuOYyOPqQUVchTS+fc5Ek6V7DkQBJkiSpMYYASZIkqTGGAEmSJKkxhgBJkiSpMYYASZIkqTGGAEmSJKkxhgBJkiSpMYYASZIkqTGGAEmSJKkxhgBJkiSpMYYASZIkqTGGAEmSJKkxhgBJkiSpMYYASZIkqTFzNgQk2TDJaUnmJXlnkouSvHPIxzw5yQOHeQxJkiRpfc0fdQFDdCCwvKruTPJSYElV3T7YIcn8qrpjGo95LPC3wGHTuE9JkiRpWs3ZkQBgX+D4JCcA9wfOTLJ3kqOSvDvJqcC/JNk5yRlJzk9y3Ng7+Um+nuQ9SU5P8v0kuyRZnuSHSf5pkmOeALxoZk5PkiRJWjdzMgQk2QDYtqpWVtVzgFuraueq+lTfZXvgGVX1GuAY4O+r6jHABcA/DOzqV1X1ZOBDwPHAy4AdgQOSLBp/3Kq6DrjfROskSZKk2WJOhgBgMXD9atZ/pp8m9ABg06o6rW8/GnjyQL8T+t8XABdV1RX9lKKfAFtMsu9VwObjG5MclGRFkhVX3XjLWpyKJEmSNL3magi4FViwmvU3T3E/Y88Q3DWwPPZ6sucpFvTHv4eqOqKqllXVsiULN5ri4SVJkqTpNydDQD8tZ16S1QUBquoG4Loke/RN+wGnrWaT35LklCRL++UADwFWrnXRkiRJ0gyZy58OdBKwO3DyGvrtD3woyUZ003xeMtUDJLkP8DDg2r7p8cAZ0/yJQ5IkSdK0mssh4HDgYODkqtp4rLGqDhjsVFXnAk8av3FVPXVg+evA18evS7Ij8LmqGpv+sx/wgWmpXpIkSRqSOTkdCKCqzgFOTTJviMe4sKoOHmi6sKpOGdbxJEmSpOkwl0cCqKojZ/h4H57J40mSJEnrYs6OBEiSJEmamCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWrM/FEX0KRFS2H/w0ZdhSRJkhrlSIAkSZLUGEOAJEmS1BhDgCRJktQYnwkYhWsug6MPGXUV0vTyORdJku41HAmQJEmSGmMIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhpjCJAkSZIaYwiQJEmSGmMIkCRJkhoz1BCQZMMkpyWZN8zjrI8k/51k02nYz5IkJ05DSZIkSdJQDXsk4EBgeVXdOZXOowgLVfUHVXX9NOznKuCKJLutf1WSJEnS8Aw7BOwLHJ/kqUlOT3Jcku8l+VCS+wAkuSnJPyY5E9g1ycFJLux/XjW2oyQvTnJ+kvOSHNu3LUnyuSTf7X9269ufkuTc/uecJAuTbNbXcG6/7z36viuTLE6ydZLvJ/lwkouSnJRkw77PLv2xv53knUkunOR8P9+fsyRJkjRrDS0EJNkA2LaqVvZNTwBeAzwa2A74k779/sCFVfVE4FbgJcATgScBf5XksUl2AA4BnlZVOwGv7Lf9N+A9VbUL8HzgI337a4GXVdXOwB79fvcBvtK37QScO0HZDwfeX1U7ANf3+wT4GPDXVbUrsLpRjRX98SRJkqRZa/4Q972Y7kZ6zHeq6icAST4B7A58lu6m+nN9n92B46rq5r7fcrqb6gI+W1VXA1TVtX3/ZwCPSjJ2jE2SLAS+Bbw7ycfppiNdmuS7wJFJ7gt8vqrOnaDmnw60nwVs3T8vsLCq/qdv/y9gr0nOeRWw+UQrkhwEHASw5aJNJtlckiRJGr5hTge6FVgw8LrGrR97fdvAMwNhYplge+jq37Wqdu5/llbVjVX1duAvgQ2BM5I8sqpOB54MXAYcm+TFE+zv9oHlO+lC0mQ1TWQB3Xn/lqo6oqqWVdWyJQs3WotdSpIkSdNraCGgqq4D5iUZCwJPSLJN/yzA3sA3J9jsdOCPk2yU5P7A84BvAKcAL0iyCCDJg/r+JwEvH9s4yc797+2q6oKq+he6KTqPTLIVsKqqPgx8FHjcWpzHjUme1De9cOB4S5OcMtB9e2Cy5wUkSZKkWWGY04Ggu0nfHbgD+DbwdrpnAk4HjhvfuarOTnIU8J2+6SNVdQ5AksOA05LcCZwDHAC8Anh/kvPpzuV04K+BVyXZk+7d/O8BX6a7eX9dkl8DNwETjQRM5i+ADye5Gfg6cEPfvll/bmP2BL60FvuVJEmSZlyqJpplM007Tx4LHEz3zvtrq2qyufSzWpKNq+qmfvkNwGZV9cokLwd+VlUn9OtOB57bjx5Matk2m9WKQw8cet3SjNr/sFFXIEmSxklyVlUtG98+1JGAqjonyanArP2ysCn6wyRvpLtel9CNQlBVh491SLIEePeaAoAkSZI0asOeDkRVHdkvnrLajrNYVX0K+NQa+lxF9z0BkiRJ0qw27C8LkyRJkjTLGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxhgCJEmSpMYYAiRJkqTGGAIkSZKkxswfdQFNWrQU9j9s1FVIkiSpUY4ESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY0xBEiSJEmNMQRIkiRJjTEESJIkSY1JVY26huYkuRH4wajraMRi4OpRF9EIr/XM8VrPHK/1zPFazyyv98wZ9bXeqqqWjG+cP4pKxA+qatmoi2hBkhVe65nhtZ45XuuZ47WeOV7rmeX1njmz9Vo7HUiSJElqjCFAkiRJaowhYDSOGHUBDfFazxyv9czxWs8cr/XM8VrPLK/3zJmV19oHgyVJkqTGOBIgSZIkNcYQMIOSPCvJD5L8KMkbRl3PXJbkyCSrklw46lrmuiRbJDk1yfeTXJTklaOuaa5KsiDJd5Kc11/rt466prkuybwk5yT54qhrmcuSrExyQZJzk6wYdT1zWZJNk3w2ycX9f7d3HXVNc1GSR/T/nsd+fpnkVaOua5DTgWZIknnA/wK/B1wKfBd4UVV9b6SFzVFJngzcBBxTVTuOup65LMlmwGZVdXaShcBZwB/7b3v6JQlw/6q6Kcl9gW8Cr6yqM0Zc2pyV5GBgGbBJVe016nrmqiQrgWVV5efWD1mSo4FvVNVHkmwAbFRV14+4rDmtvwe8DHhiVV0y6nrGOBIwc54A/KiqflJVvwI+CTx3xDXNWVV1OnDtqOtoQVVdUVVn98s3At8Hlo62qrmpOjf1L+/b//hOzpAkeSjwh8BHRl2LNB2SbAI8GfgoQFX9ygAwI54O/Hg2BQAwBMykpcDPB15fijdKmmOSbA08FjhzxKXMWf30lHOBVcBXq8prPTzvBV4P3DXiOlpQwElJzkpy0KiLmcO2Ba4CPtZPc/tIkvuPuqgGvBD4xKiLGM8QMHMyQZvv4GnOSLIx8DngVVX1y1HXM1dV1Z1VtTPwUOAJSZzuNgRJ9gJWVdVZo66lEbtV1eOAZwMv66d0avrNBx4HfLCqHgvcDPiM4hD1U66eA3xm1LWMZwiYOZcCWwy8fihw+YhqkaZVPz/9c8DHq2r5qOtpQT+E/3XgWaOtZM7aDXhOP1f9k8DTkvznaEuau6rq8v73KuA4uim0mn6XApcOjCB+li4UaHieDZxdVVeOupDxDAEz57vAw5Ns06fCFwInjLgmab31D6t+FPh+Vb171PXMZUmWJNm0X94QeAZw8UiLmqOq6o1V9dCq2pruv9dfq6o/H3FZc1KS+/cfKkA/NeWZgJ/sNgRV9Qvg50ke0Tc9HfBDHIbrRczCqUDQDQtpBlTVHUleDnwFmAccWVUXjbisOSvJJ4CnAouTXAr8Q1V9dLRVzVm7AfsBF/Rz1QHeVFX/PbqS5qzNgKP7T5q4D/DpqvKjK3Vv92DguO79BOYD/1VVJ462pDnt74CP929I/gR4yYjrmbOSbET3qZAvHXUtE/EjQiVJkqTGOB1IkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCS9BtJ/jnJU5P8cZK1+ibR/nsMzkxyTpI9xq3bIMl7k/w4yQ+THJ/kodNbvSRpqgwBkqRBTwTOBJ4CfGMtt306cHFVPbaqxm/7NmAhsH1VPRz4PLC8/7K5aZfE78GRpNXwewIkSSR5J/D7wDbAj4HtgJ8Cn62qfxzXdyvgSGAJcBXdlw09iO5b0DcELgN2rapb+/4bAT8HtqmqXw7s5xvAoVV1SpIXA68FCji/qvZL8mDgQ8C2/SZ/A1wOfLGqduz38Vpg46o6NMnXgf+h+wK7E4D/Bd4MbABcA+xbVVcmORTYst/vlsB7q+rf+/1NVMeSvo4t+zpeVVXfSvIU4N/6tgKeXFU3rt2Vl6TR8J0SSRJV9bokn6H79ueDga9X1W6TdD8cOKaqjk5yIPDvVfXHSd4CLKuql4/r/zDgZ4MBoLcC2CHJL4BDgN2q6uokD+rX/ztwWlU9r/+W5I2BB67hVDatqqcAJHkg8KSqqiR/CbweeE3f75HAnnSjEz9I8kFg+0nq+DfgPVX1zSRb0n3z++/ShYWX9YFgY+C2NdQmSbOGIUCSNOaxwLl0N8jfW02/XYE/6ZePBd6xhv2G7p3yydqfRjficDVAVV3br38a8OK+7U7ghv7GfnU+NbD8UOBTSTajGw346cC6L1XV7cDtSVYBD15NHc8AHjUwc2mTJAuBbwHvTvJxYHlVXbqG2iRp1jAESFLjkuwMHEV303w1sFHXnHMZmNazGmuaV/ojYKskC8dNl3kc8AVghynsY8wd3PN5tgXj1t88sPw+4N1VdUKSpwKHDqy7fWD5Trr/P5wsrNyHia/D25N8CfgD4Iwkz6iqi6d4HpI0Uj4YLEmNq6pzq2pnujn0jwK+Bvx+Ve08SQD4H+CF/fK+wDfXsP+bgaPp3jWfB7+Ze79Rf6xTgBckWdSvG5uGcwrdcwAkmZdkE+BK4HeSLEpyP2Cv1Rz6AXTPJwDsv7oaB443UR0nAb+Z4tSHJpJsV1UXVNW/0E1teuQUjiFJs4IhQJJE//DrdVV1F/DIqlrddKBXAC9Jcj7dMwSvnMIh3kg3Z/5/k/wQ+DPgedW5CDgMOC3JecC7+21eCeyZ5ALgLGCHqvo18I90n2D0RWB177wfCnymfwD56jUVuJo6XgEsS3J+ku8Bf923vyrJhX3fW4EvT+E6SNKs4KcDSZIkSY1xJECSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJaowhQJIkSWqMIUCSJElqjCFAkiRJasz/B1zYkFAHrfAuAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "unigrams[0:10].sort_values().plot.barh(color='lightsalmon', width=.9, figsize=(12, 8))\n", "plt.title('10 Most Frequently Occuring Unigrams')\n", "plt.ylabel('Unigram')\n", "plt.xlabel('# of Occurances')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(that, were) 2\n", "(domain, of) 1\n", "(with, the) 1\n", "(to, grow) 1\n", "(learning, from) 1\n", " ..\n", "(in, natural) 1\n", "(of, deep) 1\n", "(possible, before) 1\n", "(grow, in) 1\n", "(other, fields) 1\n", "Length: 105, dtype: int64\n" ] } ], "source": [ "# bigrams: n=2\n", "bigrams = (pd.Series(nltk.ngrams(tokens, 2)).value_counts()) \n", "print(bigrams)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 0, '# of Occurances')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAw8AAAHwCAYAAADgh1MzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA+dklEQVR4nO3de5xdZX3v8c/XRAjITQmKgBARkcYCCQYqCgrqseqxtdYLKgeL1HL0lFqk2uZY60HbKvXaKlVLFRGLVrGoqFUR5KqgBgIEFMULeEFElJsQ0MDv/LGekc04l5XMTPZM8nm/XvOatdd61rN+a2XY7O9ez7N3qgpJkiRJmsz9hl2AJEmSpLnB8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4Mkac5KclCSHw27jqlKcmWSg4Zw3J2T/DLJvPV9bElzk+FBkiaQ5KgkK5LcleSkMbY/OclVSe5IcnaSXSbo65wklWTvUes/2dYfNMVar0nylAm2H5TknvZiceTn01M55vrWrtNu09jf4UlWtX+/65O8J8k209V/X1X16Ko6Z7r7bed398C/9/eSvHzguD+oqi2q6u7pPrakDZPhQZImdh3wD8CJozckWQicBvwd8CBgBfDRSfr7NvDigT62BR4L/Gya6p3Mde3F4sjPH4xukGT+eqplqJL8FfBPwKuBren+HXYBvphkk/VUw/q41heO/HsDzwXenGTpVDvdWP5OJN2X4UGSJlBVp1XVJ4Gfj7H5j4Erq+rUqroTOBbYO8keE3R5CnDIwDCRFwKfAH410iDJpkn+Ocl17eefk2zati1M8pkkNyf5RZLzk9wvyYeAnYFPt3eY/7rvObZ3p7+c5B1JfgEc22p4a5IfJPlpkvcm2Wxgn1cn+Umr74jBOwLtDstLR/V/wcDjPZJ8sdX/rSTPH9h2UpJ/TfLZJLcl+WqSR7Rt57Vml7VzPGTUebw6yX+NWveuJP88xjlvBbwe+Iuq+nxV/bqqrgGeTxcg/ldrNy/Ja5J8t9VzcZKHtW2PHjiPnyZ5zcA5/MPAse4ztKrdIfqbJJcDtyeZP3jXKMmxST6W5OR2zCuTLBvYf58kK9u2U5N8dPB4E6mqS4BvAr/T+lrU/u3mt8cPT3Je6/vM9m/xH6Pa/mmSHwBfautPbXdtbmn7PnrUv+e7k3yu/Zt9Ocn27W/6pnR37ZYOtP+bJD9ux/9Wkif3OS9J64/hQZLW3aOBy0YeVNXtwHfb+vFcB3wDeGp7/GLg5FFt/pbuXfAlwN7AfsBr27a/An4EbAc8BHhNd+g6DPgB8AftXeY3r+W5/B7wPeDBwD/SvSO/e6thN2BH4HUASZ4GvAr4H8AjgXGHSo2W5AHAF4EPt2O9EHj34AvOtu71wAOB77R6qKontO17t3McfZfnP4CnpQ07ai+IDwE+NEYpjwMW0N05+o2q+iXwuXZuAMe0ep4BbAUcAdyRZEvgTODzwA501+isvteh9fk/gW2qas0Y2/8Q+E9gG+B04Ph2TpvQhc2T6O52fQR4dt+DJtmX7t91xThNPgx8DdiWLgwfNkabJ9KFj99vjz9H93fwYOASuoA86Pl0f78LgbuAC1u7hcDHgbe32h4FHAXsW1Vbtv6v6XtuktYPw4MkrbstgFtGrbsF2HKS/U4GXtxeLG1TVReO2n4o8IaquqGqfkb3QnrkRdyvgYcCu7R3y8+vqlqLmndody1Gfkbe9b+uqt7VXsjeCfwZ8Mqq+kVV3Qa8EXhBa/t84ANVdUULTMeuxfGfCVxTVR+oqjXtnfD/ohtOM+K0qvpaq+UUugAzqar6CXAe8Ly26mnAjVV18RjNF7ZtY71w/0nbDvBS4LVV9a3qXFZVP2/ncX1Vva2q7qyq26rqq33qbN5ZVT+sqtXjbL+gqv67zUX4EF2IhC5Uzm/7/7qqTqN7sT+Rx7Z/61+2th8Crh7dKMnOwL7A66rqV1V1AV1wGe3Yqrp9pPaqOrGd/13ce/dt64H2n6iqi9vduU8Ad1bVye3cPgqM3Hm4G9gUWJzk/lV1TVV9d5Jzk7SeGR4kad39ku7d6EFbAbdNst9pwJOAv2Dsd8V3AK4deHxtWwfwFrp3489IN/l1+VrWfF1VbTPw87G2/ocDbbYDNgcuHgkZdO+wbzdQ32D7wVonswvwe4MBhi4sbT/Q5vqB5TvoQlpfH6QNOWq/x7q+ADcCCzP2uP2Htu0AD6O7mzTaeOv7+uEk20dfgwWt1h2AH48KjJP1dVH7t96C7jo/mi4MjrYD8IuqumOSvn+zrg3rOq4N67qVe+8ULBxo/9OB5dVjPN4CoKq+AxxNF0BuSPKfSXZA0qxieJCkdXcl974jPDIk5xFt/bjai7PPAS9n7Be319G9yB6xc1tHe4f3r6pqV+APgGMGxoWvzR2I3yprYPlGuhd1jx4IGVu3F5/QvTP/sFH1DbqdLnyMGAwGPwTOHRVgtqiqlzM9PgnsleR36e4OjB5CM+JCuiE0fzy4sv0bPp17hyD9kO7fdLTx1sPE5z9iXf+tfgLsmCQD6x42XuPfOmjVT+nu9PzWRPnW94OSDNY+Vt+Dtb8IeBbd0LWtgUVtfVgHVfXhqjqA7u+/6IbPSZpFDA+SNIE2mXUBMA+Yl2TBwLvVnwB+N8lzWpvXAZdX1VU9un4N8MQ2SXe0jwCvTbJduk90eh3deH6SPDPJbu3F4610Qz1GPmbzp8Cu63am96qqe4B/B96R5MHtuDsmGRnj/jHg8CSL2wvN/zeqi0uBP06yebpJ1H86sO0zwO5JDkty//azb5Lf6VnehOfYhsZ8nDZ2v6p+ME67W+iGg70rydNaHYuAU+nmlIyEuvcBf5/kkensle4Tsj4DbJ/k6HSTy7dM8nsD5/+MJA9Ksj3du+nT5UK6f++j2t/ms+jmxPTSan82YwTcqrqWbi7EsUk2SbI/Y4eMQVvShbCf0wWmse5o9K3tUUmelO7DAe6kC7B+hKw0yxgeJGlir6V7EbOcbhjM6raONh/hOXQTem+im3T8grG7ua+quq6NKR/LP9C9iLscWEU3uXTk03QeSTdR95d0LyTfPfD9AG+iCx03J3lV/1Mc09/QDY+6qA1HORN4VKv9c8A/033aznfa70HvoPv0qJ/SDSP6zbv/bf7EU+mu03V0w3P+iW6sex/HAh8cNV9jtA8CezL+kKWRWt5MF+LeShfEvkp3R+HJbfw+dJN5Pwac0dq8H9isncf/oHtxfT3dHIKD2z4foptIf03bb7KP7+2tqn5Fd7fkT4Gb6f4mP0P3An48+7dPOvol3Sct/YxuyNxYDgX2pwsD/0BX+0R9n0w3bO3HdB8EcFHfcxnDpsBxdHe+rqebgP2aKfQnaQZk7ebZSZL025IU8Mg2bn3YtewMXAVsX1W3DruemZbkq8B7q+oDM9D3R4Grqmr03SVJGynvPEiSNhhJ7kf38ar/uaEGhyRPbN+VMD/JnwB70U1on46+903yiHTfHfI0uvkMn5yOviVtGPx2SEnSBqFNdv4p3TCapw25nJn0KLqhVFvQfeLTc9vH1E6H7ek+DWxburkfL6+qldPUt6QNgMOWJEmSJPXisCVJkiRJvRgeJEmSJPXinIc5YuHChbVo0aJhlyFJkqQN3MUXX3xjVW031jbDwxyxaNEiVqxYMewyJEmStIFLcu142xy2JEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerFL4mbI66/Yw3Hrbxx2GVIkiRphi1funDYJYzLOw+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSplzkTHpJsluTcJPOSLEryooFthyc5fi37OzrJ5tNf6dpJ8swkrx92HZIkSdJk5kx4AI4ATququ4FFwIsmbj6po4H1Eh6SzJ9g82eBP5wNQUaSJEmayFwKD4cCn2rLxwEHJrk0ySvbuh2SfD7J1UnePLJTkvckWZHkypF3+JO8AtgBODvJ2eMdMMmDk1zclvdOUkl2bo+/m2TzJNsl+a8kX28/j2/bj01yQpIzgJPHa1dVBZwDPHP6LpUkSZI0/SZ6R3zWSLIJsGtVXdNWLQdeVVXPbNsPB5YAS4G7gG8leVdV/RD426r6RZJ5wFlJ9qqqdyY5Bji4qm4c77hVdUOSBUm2Ag4EVtCFlguAG6rqjiTvA95RVRe0YPEF4HdaF48BDqiq1Uk+PEG7Fa3/j4067yOBIwG22X6ndbp2kiRJ0nSZE+EBWAjcPEmbs6rqFoAk3wB2AX4IPL+9CJ8PPBRYDFy+Fsf+CvB44AnAG4GnAQHOb9ufAixOMtJ+qyRbtuXTq2r1RO2q6jbgBro7IfdRVScAJwDstHhJrUXNkiRJ0rSbK+FhNbBgkjZ3DSzfDcxP8nDgVcC+VXVTkpN69DPa+XR3BXahGzb1N0ABn2nb7wfsPxASAGgh4faBVWO2axbQnaMkSZI0a82JOQ9VdRMwL8nIC//bgC0n2GXEVnQv4G9J8hDg6QPb7tNHkpOT7DdGH+cB/wu4uqruAX4BPAP4ctt+BnDUQD9Lxqllona7A1f0OB9JkiRpaOZEeGjOAA5oy5cDa5JcNjBh+rdU1WXASuBK4ETufcEP3XCgzw1MmN4L+MkYfVzTFs9rvy8Abm6BBuAVwLIkl7fhUi8bp5yJ2h1M96lLkiRJ0qyV7sN+Zr8kS4FjquqwGeh7K+D9VfW86e67x7EfAny4qp48UbudFi+po045cz1VJUmSpGFZvnThUI+f5OKqWjbWtjlz56GqVtJ9tOq8Gej71mEEh2Zn4K+GdGxJkiSpt7kyYRqAqjpx2DVMt6r6+rBrkCRJkvqYM3ceJEmSJA2X4UGSJElSL4YHSZIkSb0YHiRJkiT1YniQJEmS1IvhQZIkSVIvhgdJkiRJvRgeJEmSJPVieJAkSZLUi+FBkiRJUi+GB0mSJEm9zB92Aepn+83ns3zpwmGXIUmSpI2Ydx4kSZIk9WJ4kCRJktSL4UGSJElSL855mCOuv2MNx628cdhlSNJ65VwvSZpdvPMgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqRe5nx4SLJZknOTzBt2LX0lOTDJlUkuTfKwJJ8fdk2SJEnSZOZ8eACOAE6rqrsHV87yMHEo8NaqWlJVPwR+kuTxwy5KkiRJmsiGEB4OBT4FkOSgJGcn+TCwqq37ZJKL2zv9R47slOSXSf4xyWVJLkrykLb+Ee3x15O8IckvB/Z5dVt/eZLXT1ZYkicnWZlkVZITk2ya5KXA84HXJTmlNf1kOw9JkiRp1prT4SHJJsCuVXXNwOr9gL+tqsXt8RFV9RhgGfCKJNu29Q8ALqqqvYHzgD9r6/8F+Jeq2he4buBYTwUe2fpfAjwmyRMmqG0BcBJwSFXtCcwHXl5V7wNOB15dVSOBYQVw4NpfAUmSJGn9mdPhAVgI3Dxq3deq6vsDj1+R5DLgIuBhdAEA4FfAZ9ryxcCitrw/cGpb/vBAP09tPyuBS4A9Bvoay6OA71fVt9vjDwLjhY0bgB1Gr0xyZJIVSVbcftPPJziUJEmSNPPmD7uAKVoNLBi17vaRhSQHAU8B9q+qO5KcM9D+11VVbfluJr8WAd5UVf/Ws7b0bEerafXolVV1AnACwE6Ll9To7ZIkSdL6NKfvPFTVTcC8NkRoLFsDN7XgsAfw2B7dXgQ8py2/YGD9F4AjkmwBkGTHJA9uy2cl2XFUP1cBi5Ls1h4fBpw7zjF3B67oUZskSZI0NHM6PDRnAAeMs+3zwPwklwN/TxcMJnM0cEySrwEPBW4BqKoz6IYxXZhkFfBxYMsk9wN2A34x2ElV3Qm8BDi1tb8HeO84xzwY+GyP2iRJkqShmevDlgCOB44Bzqyqc4BzRjZU1V3A08faqaq2GFj+OF0YAPgx8NiqqiQvoJvMPNLuX+gmVP9Gkt8F/quqxhp2dBawdIz1h49a9YfAs8Y7QUmSJGk2mPPhoapWto9nnTf6ux7W0WOA45OEbjL2EZMc/wq68LJOkmwHvL0NwZIkSZJmrTkfHgCq6sRp7Ot8YO/p6q/H8X5G9z0PkiRJ0qy2Icx5kCRJkrQeGB4kSZIk9WJ4kCRJktSL4UGSJElSL4YHSZIkSb0YHiRJkiT1YniQJEmS1IvhQZIkSVIvhgdJkiRJvRgeJEmSJPVieJAkSZLUi+FBkiRJUi/zh12A+tl+8/ksX7pw2GVIkiRpI+adB0mSJEm9GB4kSZIk9WJ4kCRJktSLcx7miOvvWMNxK28cdhmStF4510uSZhfvPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqZcNLjwk2SzJuUnmJdkhyceHXdNEkuyZ5KRh1yFJkiRNZoMLD8ARwGlVdXdVXVdVzx12QROpqlXATkl2HnYtkiRJ0kQ2xPBwKPApgCSLklzRlg9PclqSzye5OsmbJ+okyYOTXNyW905SIy/wk3w3yeZJ/iDJV5OsTHJmkockuV/rf7vW9n5JvpNkYZLnJbkiyWVJzhs43KeBF8zAtZAkSZKmzQYVHpJsAuxaVdeM02QJcAiwJ3BIkoeN11dV3QAsSLIVcCCwAjgwyS7ADVV1B3AB8NiqWgr8J/DXVXUP8B90IQbgKcBlVXUj8Drg96tqb+APBw63oh1DkiRJmrU2qPAALARunmD7WVV1S1XdCXwD2GWS/r4CPB54AvDG9vtA4Py2fSfgC0lWAa8GHt3Wnwi8uC0fAXygLX8ZOCnJnwHzBo5zA7DD6IMnOTLJiiQrbr/p55OUKkmSJM2sDS08rAYWTLD9roHlu4H5k/R3Pl1Y2IVuKNTewAHAyJCjdwHHV9WewP8eOXZV/RD4aZInAb8HfK6tfxnwWuBhwKVJtm39LGi130dVnVBVy6pq2QMeuO3ozZIkSdJ6tUGFh6q6CZiXZKIA8VuSnJxkvzE2nQf8L+DqNhzpF8Az6O4gAGwN/Lgt/8mofd9HN3zpY1V1dzvOI6rqq1X1OuBGuhABsDtwxdrULEmSJK1vG1R4aM6guzuwNvYCfjJ65cDciZE7DRcAN7eQAnAscGqS8+nCwKDTgS24d8gSwFuSrGqTuM8DLmvrDwY+u5Y1S5IkSetVqmrYNUyrJEuBY6rqsJ7ttwLeX1XPm+Y6lgHvqKoJJ0In2RQ4FzigqtaM126nxUvqqFPOnM4SJWnWW7504bBLkKSNTpKLq2rZWNsmG/M/51TVyiRnJ5k3Mlxokva3AtMdHJYDL+feT1yayM7A8omCgyRJkjQbbHDhAaCqThzy8Y8DjuvZ9mrg6pmtSJIkSZq6DXHOgyRJkqQZYHiQJEmS1IvhQZIkSVIvhgdJkiRJvRgeJEmSJPVieJAkSZLUi+FBkiRJUi+GB0mSJEm9GB4kSZIk9WJ4kCRJktSL4UGSJElSL4YHSZIkSb3MH3YB6mf7zeezfOnCYZchSZKkjZh3HiRJkiT1YniQJEmS1IvhQZIkSVIvznmYI66/Yw3Hrbxx2GVI0nrlXC9Jml288yBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF5mfXhIslmSc5PMS7IoyYum2N8rknwzySlJ/jDJ8umqdR3r2STJeUnmD7MOSZIkaTKzPjwARwCnVdXdwCJgSuEB+D/AM6rq0Ko6vaqOm2qBU1FVvwLOAg4ZZh2SJEnSZOZCeDgU+FRbPg44MMmlSV6ZZEGSDyRZlWRlkoMn6ijJe4FdgdPb/ocnOb5tOynJO5N8Jcn3kjy3rd8iyVlJLmnHeVZbv6jdwfj3JFcmOSPJZm3bbknOTHJZ2+8Rbf2rk3w9yeVJXj9Q2ifbeUqSJEmz1qwOD0k2AXatqmvaquXA+VW1pKreAfw5QFXtCbwQ+GCSBeP1V1UvA64DDm77j/ZQ4ADgmXRBBeBO4NlVtQ9wMPC2JGnbHgn8a1U9GrgZeE5bf0pbvzfwOOAnSZ7a2u8HLAEek+QJrf0VwL69LookSZI0JLM6PAAL6V6Uj+cA4EMAVXUVcC2w+xSO98mquqeqvgE8pK0L8MYklwNnAjsObPt+VV3ali8GFiXZEtixqj7R6rqzqu4Antp+VgKXAHvQhQnakKxftX1/I8mRSVYkWXH7TT+fwmlJkiRJUzfbJ+muBsa9k0D3wn463TVG34cC2wGPqapfJ7lmoKbB9ncDm01QU4A3VdW/jbN9U7q7HL9RVScAJwDstHhJ9TwHSZIkaUbM6jsPVXUTMG9gKNJtwOC78+fR5gok2R3YGfhWkh2TnDVNZWwN3NCCw8HALpPUfCvwoyR/1OraNMnmwBeAI5Js0dbvmOTBbXlb4GdV9etpqlmSJEmadrM6PDRn0A1PArgcWNMmIr8SeDdduFgFfBQ4vKruopu7sGaajn8KsCzJCrqgclWPfQ4DXtGGOn0F2L6qzgA+DFzY6v049wahg4H/nqZ6JUmSpBmRqtk9GibJUuCYqjpsLfY5CvhBVZ0+c5VNnySnAf+3qr41XpudFi+po045cz1WJUnDt3zpwmGXIEkbnSQXV9WysbbN9jkPVNXKJGcnmdcmFvfZ5/iZrmu6tE+U+uREwUGSJEmaDWZ9eACoqhOHXcNMaV8Sd/Kw65AkSZImMxfmPEiSJEmaBQwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6mX+sAtQP9tvPp/lSxcOuwxJkiRtxLzzIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxQnTc8T1d6zhuJU3DrsMSVqv/KAISZpdvPMgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqRe5lR4SLJZknOTzEtyUJLPrOX+Bya5MsmlSTabqTrXRpKjkrxk2HVIkiRJk5lT4QE4Ajitqu5ex/0PBd5aVUuqavVkjZPMW8fjrI0TgVesh+NIkiRJUzLXwsOhwKcGHm+V5BNJvpHkvUnuB5DkqUkuTHJJklOTbJHkpcDzgdclOSWdtyS5IsmqJIe0fQ9KcnaSDwOr2l2OtyT5epLLk/zvyYpM8p4kK9pdjtcPrD+u1Xp5krcCVNUdwDVJ9pu+yyRJkiRNv/nDLqCvJJsAu1bVNQOr9wMWA9cCnwf+OMk5wGuBp1TV7Un+Bjimqt6Q5ADgM1X18STPAZYAewMLga8nOW+g39+tqu8nORK4par2TbIp8OUkZ1TV9yco92+r6hftzsVZSfYCfgQ8G9ijqirJNgPtVwAHAl8bdc5HAkcCbLP9TmtxtSRJkqTpN2fCA90L/JtHrftaVX0PIMlHgAOAO+kCxZeTAGwCXDhGfwcAH2lDoH6a5FxgX+DW1u9IOHgqsFeS57bHWwOPBCYKD89vL/znAw9t9Xyj1fa+JJ8FBudr3ADsMbqTqjoBOAFgp8VLaoLjSZIkSTNuLoWH1cCCUetGv6AuIMAXq+qFk/SXCbbdPqrdX1TVF/oUmeThwKuAfavqpiQnAQuqak0bmvRk4AXAUcCT2m4L6M5PkiRJmrXmzJyHqroJmJdkMEDsl+Thba7DIcAFwEXA45PsBpBk8yS7j9HlecAhbU7DdsATGDVsqPkC8PIk92/97Z7kAW35qjHab0UXPm5J8hDg6a3tFsDWVfXfwNF0Q6ZG7A5c0eMySJIkSUMzl+48AJxBN9zozPb4QuA4YE+6MPCJqronyeHAR9ocBejmQHx7VF+fAPYHLqO7Y/HXVXV9ktHDh94HLAIuSTcO6mfAHyVZyBh3L6rqsiQrgSuB7wFfbpu2BD7Vwk+AVw7s9njg9UiSJEmzWKrmzlD6JEvpJj8fNgtqeSbdBO53TrGfXue00+IlddQpZ07URJI2OMuXLhx2CZK00UlycVUtG2tb7zsPSR4IPGxwn6q6ZOrl9VdVK9vHqM6bwnc9TFcta/UFdRNYCPzdNPUlSZIkzZhe4SHJ3wOHA9/l3knKxb0TftebqjpxfR9zJlXVF4ddgyRJktRH3zsPzwceUVW/msliJEmSJM1efT9t6QpgmxmsQ5IkSdIs1/fOw5uAlUmuAO4aWVlVfzgjVUmSJEmadfqGhw8C/wSsAu6ZuXIkSZIkzVZ9w8ONU/1IUkmSJElzW9/wcHGSNwGnc99hS+v1o1olSZIkDU/f8LC0/X7swLqhfFSrJEmSpOHoFR6q6uCZLkSSJEnS7LY23zD9P4FHAwtG1lXVG2aiKEmSJEmzT6/veUjyXuAQ4C+AAM8DdpnBuiRJkiTNMn2/JO5xVfVi4Kaqej2wP/CwmStLkiRJ0mzTd9jSne33HUl2AH4OPHxmStJYtt98PsuXLhx2GZIkSdqI9Q0Pn06yDfAW4BK6T1r695kqSpIkSdLsM2l4SHI/4Kyquhn4rySfARZU1S0zXZwkSZKk2WPSOQ9VdQ/wtoHHdxkcJEmSpI1P32FLZyR5DnBaVdVMFqSxXX/HGo5beeOwy5Ck9cq5XpI0u/QND8cADwDWJLmT7uNaq6q2mrHKJEmSJM0qfb9hesuZLkSSJEnS7NYrPCTZZ4zVtwDXVtWa6S1JkiRJ0mzUd9jSu4F9gFXt8Z7AZcC2SV5WVWfMRHGSJEmSZo++3zB9DbC0qh5TVY8BlgBXAE8B3jwzpUmSJEmaTfqGhz2q6sqRB1X1Dbow8b2ZKUuSJEnSbNN32NK3krwH+M/2+BDg20k2BX49I5VJkiRJmlX63nk4HPgOcDTwSuB7bd2vgYNnoC5JkiRJs0zfj2pdTfct028bY/Mvp7UiSZIkSbPShOEhyceq6vlJVgG/9c3SVbXXjFUmSZIkaVaZ7M7DX7bfz5zpQiRJkiTNbhOGh6r6Sft97ci6JAuBn1fVb92JkCRJkrThmnDCdJLHJjknyWlJlia5gu77HX6a5Gnrp0RJkiRJs8Fkw5aOB14DbA18CXh6VV2UZA/gI8DnZ7g+SZIkSbPEZB/VOr+qzqiqU4Hrq+oigKq6auZLkyRJkjSbTBYe7hlYXj1q2zrPeUiyWZJzk8xbx/2PTrL5uh5/OiT57yTbJFnUhnON1eacJMsm6ec/kzxyZqqUJEmSps9k4WHvJLcmuQ3Yqy2PPN5zCsc9Ajitqu5ex/2PBoYaHqrqGVV18zR09R7gr6ehH0mSJGlGTRgeqmpeVW1VVVtW1fy2PPL4/lM47qHApwCSHNTeof94kquSnJIkbduTk6xMsirJiUk2TfIKYAfg7CRnT3SQ1u8/J/lKkiuS7NfWPyjJJ5NcnuSiJHu19U9Mcmn7WZlkyyQPTXJeW3dFkgNb22vaJ08BzE/ywdbfx8e6K5LkqUkuTHJJklOTbNE2nQ88JUmvL+yTJEmShmWyOw/TLskmwK5Vdc3A6qV0dxMWA7sCj0+yADgJOKSq9qSb3P3yqnoncB1wcFUd3OOQD6iqxwH/BzixrXs9sLJ9yd1rgJPb+lcBf15VS4AD6YZqvQj4Qlu3N3DpGMd4FHBC6+/WdqzBc14IvBZ4SlXtA6wAjgGoqnuA77S+GbXfkUlWJFlx+00/73GqkiRJ0sxZ7+EBWAjcPGrd16rqR+2F9KXAIroX5N+vqm+3Nh8EnrAOx/sIQFWdB2yVZBvgAOBDbf2XgG2TbA18GXh7u7uxTVWtAb4OvCTJscCeVXXbGMf4YVV9uS3/R+t/0GPpgtGXk1wK/Amwy8D2G+juptxHVZ1QVcuqatkDHrjtWp+4JEmSNJ2GER5WAwtGrbtrYPluursMmabjjZ7YXeP0XVV1HPBSYDPgoiR7tNDxBODHwIeSvLjnMQYF+GJVLWk/i6vqTwe2L+C3J6RLkiRJs8p6Dw9VdRMwrw1LmshVwKIku7XHhwHntuXbgC1HGiY5eWQ+wxgOaW0OAG6pqluA8+jmXZDkIODGqro1ySOqalVV/RPd0KI9kuwC3FBV/w68H9hnjGPsnGT/tvxC4IJR2y+iG4q1Wzvm5kl2H9i+O3Dl+JdCkiRJGr5h3HkAOIPfHtpzH1V1J/AS4NQkq+g+Nva9bfMJwOcGJkzvBfxknK5uSvKVtu/Iu/3HAsuSXA4cRzeMCODoNin6Mro7AZ8DDgIuTbISeA7wL2Mc45vAn7T+HkT3CUqD5/Iz4HDgI63NRcAeAEkeAqyuqvHqlyRJkmaFVK3z1zWs+0GTpcAxVXXYNPS1FfD+qnreGNvOAV5VVSumepyZkuSVwK1V9f6J2u20eEkddcqZ66kqSZodli9dOHkjSdK0SnJxVY35XWVDufNQVSvpPmp1nb4kblRft44VHOaQm+kmg0uSJEmz2tC+W6CqTpy81ZSPcdBMH2OqquoDw65BkiRJ6mNYcx4kSZIkzTGGB0mSJEm9GB4kSZIk9WJ4kCRJktSL4UGSJElSL4YHSZIkSb0YHiRJkiT1YniQJEmS1IvhQZIkSVIvhgdJkiRJvRgeJEmSJPUyf9gFqJ/tN5/P8qULh12GJEmSNmLeeZAkSZLUi+FBkiRJUi+GB0mSJEm9OOdhjrj+jjUct/LGYZchSeuVc70kaXbxzoMkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknqZsfCQZLMk5yaZl2RRkitm6lgDx3xDkqfMQL8HJrkyyaVJNpvmvt+a5EnT2ackSZI0E2byzsMRwGlVdfd0dZjOuDVX1euq6szpOt6AQ4G3VtWSqlo9UM+8aej7XcDyaehHkiRJmlEzGR4OBT41emW7E/GWJF9PcnmS/93Wb5HkrCSXJFmV5Flt/aIk30zybuAS4MD2+N/b3YAzRu4GJDkpyXPb8jVJXj/Q3x5t/XZJvtjW/1uSa5MsHO8kkrwUeD7wuiSnJDkoydlJPgysSrIgyQfaMVYmObjtd3iSTyb5dJLvJzkqyTGtzUVJHgRQVdcC2ybZftquvCRJkjQDZiQ8JNkE2LWqrhlj858Ct1TVvsC+wJ8leThwJ/DsqtoHOBh4W5K0fR4FnFxVS4FrgUcC/1pVjwZuBp4zTik3tv7eA7yqrft/wJfa+k8AO090LlX1PuB04NVVdWhbvR/wt1W1GPjz1m5P4IXAB5MsaO1+F3hRa/+PwB3tHC4EXjxwmEuAx09UhyRJkjRs82eo34V0L+rH8lRgr5E7BMDWdGHgR8AbkzwBuAfYEXhIa3NtVV000Mf3q+rStnwxsGicY5020OaP2/IBwLMBqurzSW7qd0r38bWq+v5Af+9q/V2V5Fpg97bt7Kq6DbgtyS3Ap9v6VcBeA/3dAOww+iBJjgSOBNhm+53WoUxJkiRp+sxUeFgNLBhnW4C/qKov3GdlcjiwHfCYqvp1kmsG+rh9VB93DSzfDYw3ifmugTYj55px2q6NwXom6m+wznsGHt/Dfa/9Arprdh9VdQJwAsBOi5fUOlUqSZIkTZMZGbZUVTcB8waG7wz6AvDyJPcHSLJ7kgfQ3YG4oQWHg4FdZqI24AK6OQwkeSrwwJENbc7FjmvZ33l08ztIsjvdMKhvrWUfuwMz/mlUkiRJ0lTM5ITpM+iG9Iz2PuAbwCXt41v/je5d+FOAZUlW0L0Yv2qG6no98NQklwBPB35CN6zofsBuwC/Wsr930wWlVcBHgcOr6q5J9vmNFqJ2A1as5XElSZKk9SpVMzMaJslS4JiqOmxGDrCOkmwK3F1Va5LsD7ynqpYk+V3giKo6Zj3X82xgn6r6u4na7bR4SR11ykx8Cq0kzV7Ll477YXiSpBmS5OKqWjbWtpma80BVrWwfaTpvOr/rYRrsDHys3Wn4FfBnAFV1BbBeg0MzH3jbEI4rSZIkrZUZCw8AVXXiTPa/LqrqamDpsOsYUVWnDrsGSZIkqY+ZnPMgSZIkaQNieJAkSZLUi+FBkiRJUi+GB0mSJEm9GB4kSZIk9WJ4kCRJktSL4UGSJElSL4YHSZIkSb0YHiRJkiT1YniQJEmS1IvhQZIkSVIvhgdJkiRJvcwfdgHqZ/vN57N86cJhlyFJkqSNmHceJEmSJPVieJAkSZLUi+FBkiRJUi/OeZgjrr9jDcetvHHYZUjSeuVcL0maXbzzIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXja48JBksyTnJpmXZFGSFw27pokkeWuSJw27DkmSJGkyG1x4AI4ATququ4FFwIyHhyTzp7D7u4Dl01WLJEmSNFM2xPBwKPCptnwccGCSS5O8MsmCJB9IsirJyiQHT9ZZkr9LclWSLyb5SJJXtfXnJHljknOBv0zy5NbnqiQnJtk0yX5JTmvtn5VkdZJNWh3fA6iqa4Ftk2w/I1dDkiRJmiZTecd81kmyCbBrVV3TVi0HXlVVz2zb/wqgqvZMsgdwRpLdq+rOcfpbBjwHWEp3rS4BLh5osk1VPTHJAuBq4MlV9e0kJwMvB45v+wIcCFwB7Nv6+upAP5cAjwf+ayrnL0mSJM2kDe3Ow0Lg5gm2HwB8CKCqrgKuBXafpP2nqmp1Vd0GfHrU9o+2348Cvl9V326PPwg8oarWAN9J8jvAfsDbgSfQBYnzB/q5Adhh9MGTHJlkRZIVt9/08wnKlCRJkmbehhYeVgMLJtietexvsva392h3PvB04NfAmXSB5ADgvIE2C+hqv4+qOqGqllXVsgc8cNveRUuSJEkzYYMKD1V1EzCvDSMCuA3YcqDJeXRzIkiyO7Az8K0kOyY5a4wuLwD+oM1R2AL4n+Mc+ipgUZLd2uPDgHMHjnk0cGFV/QzYFtgDuHJg/93phjRJkiRJs9YGFR6aM+je2Qe4HFiT5LIkrwTeTRcuVtENOTq8qu4CHgqsGd1RVX0dOB24DDgNWAHcMka7O4GXAKe2vu8B3ts2fxV4CPfeabgcuLyqCiDJ/YHdWt+SJEnSrLVBTZhujgeOAc6sql8DTx61/fAx9nks8K/j9PfWqjo2yeZ0AeBtAFV10GCjqjqLeydHD65fDWw68PjIUU2eCXy8zY+QJEmSZq0NLjxU1cokZyeZ177roc8+x0+w+YQki+nmJXywqi6ZlkLvNZ8WSCRJkqTZbIMLDwBVdeI09jWjXzJXVafOZP+SJEnSdNkQ5zxIkiRJmgGGB0mSJEm9GB4kSZIk9WJ4kCRJktSL4UGSJElSL4YHSZIkSb0YHiRJkiT1YniQJEmS1IvhQZIkSVIvhgdJkiRJvRgeJEmSJPVieJAkSZLUy/xhF6B+tt98PsuXLhx2GZIkSdqIeedBkiRJUi+GB0mSJEm9GB4kSZIk9eKchzni+jvWcNzKG4ddhiStV871kqTZxTsPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqZYMID0k2S3Juknk923+l/V6U5EUD6w9PcvxaHvvwJDsMPL4mycK12H/PJCetzTElSZKkYdggwgNwBHBaVd3dp3FVPa4tLgJeNEHTPg4Hdpis0QS1rAJ2SrLzFOuQJEmSZtSGEh4OBT4FkOTdSf6wLX8iyYlt+U+T/ENb/mXb7zjgwCSXJnllW7dDks8nuTrJmyc6aJLnAsuAU1ofm7VNf5HkkiSrkuzR2j4gyYlJvp5kZZJnDXT1aeAFU74KkiRJ0gya8+EhySbArlV1TVt1HnBgW94RWNyWDwDOH7X7cuD8qlpSVe9o65YAhwB7Aockedh4x66qjwMrgENbH6vbphurah/gPcCr2rq/Bb5UVfsCBwNvSfKAtm3FQM2SJEnSrDTnwwOwELh54PH5dHcTFgPfAH6a5KHA/sBXevR3VlXdUlV3tv13WYeaTmu/L6YbGgXwVGB5kkuBc4AFwMhQpRsYY+hTkiOTrEiy4vabfr4OZUiSJEnTZ/6wC5gGq+leiANQVT9O8kDgaXR3IR4EPB/4ZVXd1qO/uwaW72bdrtFIH4P7B3hOVX1rjPYL6M7jPqrqBOAEgJ0WL6l1qEOSJEmaNnP+zkNV3QTMS7JgYPWFwNF04eF8uqFDo4csAdwGbNnnOElOTrLfFPr4At1ciLT+lg5s2x24ok8dkiRJ0rDM+fDQnEE3p2HE+cD8qvoOcAnd3YexwsPlwJoklw1MmB7PXsBPxlh/EvDeUROmx/L3wP2By5Nc0R6POBj47CTHlyRJkoYqVXN/NEx7F/+YqjpshvrfCnh/VT1vBvreFDgXOKCq1ozXbqfFS+qoU86c7sNL0qy2fGnvr82RJE2TJBdX1bKxtm0Qdx6qaiVwdt8viVuH/m+dieDQ7Awsnyg4SJIkSbPBhjBhGoCqOnHYNayLqroauHrYdUiSJEmT2SDuPEiSJEmaeYYHSZIkSb0YHiRJkiT1YniQJEmS1IvhQZIkSVIvhgdJkiRJvRgeJEmSJPVieJAkSZLUi+FBkiRJUi+GB0mSJEm9GB4kSZIk9WJ4kCRJktTL/GEXoH6233w+y5cuHHYZkiRJ2oh550GSJElSL4YHSZIkSb0YHiRJkiT14pyHOeL6O9Zw3Mobh12GJK1XzvWSpNnFOw+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6sXwIEmSJKkXw4MkSZKkXgwPkiRJknoxPEiSJEnqxfAgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSeplVoeHJJslOTfJvDG2nZTkuTN8/K/MQJ/bJflqkpVJDkxyZpIHTvdxJEmSpOk2q8MDcARwWlXdPYyDV9XjZqDbJwNXVdXSqjof+BDwf2bgOJIkSdK0mu3h4VDgUwDpHJ/kG0k+Czx4pFGSJ7d38lclOTHJpm39NUnemOTCJCuS7JPkC0m+m+Rlrc0WSc5Kcknb/1kD/f6y/T4oyTlJPp7kqiSnJMlEhSfZpfV7efu9c5IlwJuBZyS5NMlmwOnAC6f1qkmSJEkzYNaGhySbALtW1TVt1bOBRwF7An8GPK61WwCcBBxSVXsC84GXD3T1w6raHzi/tXsu8FjgDW37ncCzq2of4GDgbeMEg6XA0cBiYFfg8ZOcwvHAyVW1F3AK8M6quhR4HfDRqlpSVaur6iZg0yTbTtKfJEmSNFSzNjwAC4GbBx4/AfhIVd1dVdcBX2rrHwV8v6q+3R5/sLUdcXr7vQr4alXdVlU/A+5Msg0Q4I1JLgfOBHYEHjJGPV+rqh9V1T3ApcCiSerfH/hwW/4QcMAEbW8Adhi9MsmR7Y7Jittv+vkkh5MkSZJm1mwOD6uBBaPW1RjtJhw+BNzVft8zsDzyeD7d0KjtgMdU1RLgp2Mcd7AfgLvbvmtjrNpHLKA73/vuUHVCVS2rqmUPeKA3JiRJkjRcszY8tOE889qwJIDzgBckmZfkoXRDjACuAhYl2a09Pgw4dy0OtTVwQ1X9OsnBwC5rU2eSNyV59hibvgK8oC0fClwwzv4BtgeuWZvjSpIkSevbrA0PzRncO9znE8DVdMOP3kMLCFV1J/AS4NQkq+juKLx3LY5xCrAsyQq6F/lXrWWNewLXj7H+FcBL2nCow4C/HGf/xwAXVdWatTyuJEmStF6laqLRNMOVZClwTFUdNuxaxpPkC1X1+1PY/1+A06vqrIna7bR4SR11ypnrehhJmpOWL1047BIkaaOT5OKqWjbWtll956GqVgJnj/UlcbPFVIJDc8VkwUGSJEmaDdZ20u96V1UnDruGmVRV/z7sGiRJkqQ+ZvWdB0mSJEmzh+FBkiRJUi+GB0mSJEm9GB4kSZIk9WJ4kCRJktSL4UGSJElSL4YHSZIkSb0YHiRJkiT1YniQJEmS1IvhQZIkSVIvhgdJkiRJvcwfdgHqZ/vN57N86cJhlyFJkqSNmHceJEmSJPVieJAkSZLUi+FBkiRJUi+GB0mSJEm9GB4kSZIk9WJ4kCRJktSL4UGSJElSL4YHSZIkSb0YHiRJkiT1YniQJEmS1Euqatg1qIcktwHfGnYdc9hC4MZhFzGHef2mxuu37rx2U+P1mxqv37rz2k3NsK/fLlW13Vgb5q/vSrTOvlVVy4ZdxFyVZIXXb915/abG67fuvHZT4/WbGq/fuvPaTc1svn4OW5IkSZLUi+FBkiRJUi+Gh7njhGEXMMd5/abG6zc1Xr9157WbGq/f1Hj91p3Xbmpm7fVzwrQkSZKkXrzzIEmSJKkXw8MskORpSb6V5DtJlo+xPUne2bZfnmSfvvtuDHpcv0Pbdbs8yVeS7D2w7Zokq5JcmmTF+q18+Hpcu4OS3NKuz6VJXtd3341Bj+v36oFrd0WSu5M8qG3b2P/2TkxyQ5Irxtnu894Eelw/n/cm0OP6+dw3jh7Xzue9CSR5WJKzk3wzyZVJ/nKMNrP7+a+q/BniDzAP+C6wK7AJcBmweFSbZwCfAwI8Fvhq33039J+e1+9xwAPb8tNHrl97fA2wcNjnMYuv3UHAZ9Zl3w39Z22vAfAHwJcGHm+0f3vt/J8A7ANcMc52n/emdv183pva9fO5bx2v3ai2Pu/99jV5KLBPW94S+PZce93nnYfh2w/4TlV9r6p+Bfwn8KxRbZ4FnFydi4Btkjy0574bukmvQVV9papuag8vAnZazzXOVlP5+/Fvb+2vwQuBj6yXyuaAqjoP+MUETXzem8Bk18/nvYn1+Psbz0b/97eW187nvVGq6idVdUlbvg34JrDjqGaz+vnP8DB8OwI/HHj8I377j2i8Nn323dCt7TX4U7o0P6KAM5JcnOTIGahvNut77fZPclmSzyV59FruuyHrfQ2SbA48DfivgdUb899eHz7vTR+f99aNz31T4PPe5JIsApYCXx21aVY///kN08OXMdaN/gis8dr02XdD1/saJDmY7n+iBwysfnxVXZfkwcAXk1zV3lXZGPS5dpfQfUX9L5M8A/gk8Mie+27o1uYa/AHw5aoafLduY/7b68PnvWng894687lv6nzem0CSLeiC1dFVdevozWPsMmue/7zzMHw/Ah428Hgn4Lqebfrsu6HrdQ2S7AW8D3hWVf18ZH1VXdd+3wB8gu6W4MZi0mtXVbdW1S/b8n8D90+ysM++G4G1uQYvYNSt+438b68Pn/emyOe9dedz37TweW8cSe5PFxxOqarTxmgyq5//DA/D93XgkUkenmQTuv/YTh/V5nTgxW32/WOBW6rqJz333dBNeg2S7AycBhxWVd8eWP+AJFuOLANPBcb89IgNVJ9rt32StOX96J4zft5n341Ar2uQZGvgicCnBtZt7H97ffi8NwU+702Nz31T4/Pe+Nrf1fuBb1bV28dpNquf/xy2NGRVtSbJUcAX6GbRn1hVVyZ5Wdv+XuC/6Wbefwe4A3jJRPsO4TSGpuf1ex2wLfDu9v+CNVW1DHgI8Im2bj7w4ar6/BBOYyh6XrvnAi9PsgZYDbyguo988G+v3/UDeDZwRlXdPrD7Rv23B5DkI3SfaLMwyY+A/wfcH3ze66PH9fN5bwI9rp/PfePoce3A572JPB44DFiV5NK27jXAzjA3nv/8hmlJkiRJvThsSZIkSVIvhgdJkiRJvRgeJEmSJPVieJAkSZLUi+FBkiRJUi+GB0nSlCV5U5KDkvxRkuVrue92Sb6aZGWSA0dt2yTJPyf5bpKrk3wqyU7TW70kqS/DgyRpOvwe8FW6L4Y6fy33fTJwVVUtrarR+74R2BLYvaoeCXwSOG3kC7ymWxK//0iSJuD3PEiS1lmStwC/Dzwc+C7wCOD7wMer6g2j2u4CnAhsB/yM7ouPHkT3DambAT8G9q+q1a395sAPgYdX1a0D/ZwPHFtVZyV5MfAqoIDLq+qwJA8B3gvs2nZ5OXAd8Jmq+t3Wx6uALarq2CTnAF+h+/Km04FvA68FNqH7VuFDq+qnSY6l+yKnXdvvf66qd7b+xqpju1bHzq2Oo6vqy0meCPxLW1fAE6rqtrW78pI0HL7DIklaZ1X16iSn0n1j6jHAOVX1+HGaHw+cXFUfTHIE8M6q+qMkrwOWVdVRo9rvBvxgMDg0K4BHJ7ke+Fvg8VV1Y5IHte3vBM6tqmcnmQdsATxwklPZpqqeCJDkgcBjq6qSvBT4a+CvWrs9gIPp7oZ8K8l7gN3HqeNfgHdU1QVJdqb7VtjfoQsZf96CxBbAnZPUJkmzhuFBkjRVS4FL6V5Yf2OCdvsDf9yWPwS8eZJ+Q/fO/Hjrn0R3h+NGgKr6Rdv+JODFbd3dwC0tEEzkowPLOwEfTfJQursP3x/Y9tmqugu4K8kNwEMmqOMpwOKBEVZbJdkS+DLw9iSnAKdV1Y8mqU2SZg3DgyRpnSRZApxE92L7RmDzbnUuZWD40QQmGzf7HWCXJFuOGtazD/Bp4NE9+hixhvvO81swavvtA8vvAt5eVacnOQg4dmDbXQPLd9P9f3S8kHM/xr4OxyX5LPAM4KIkT6mqq3qehyQNlROmJUnrpKouraoldHMEFgNfAn6/qpaMExy+ArygLR8KXDBJ/7cDH6R7l34e/GZuwebtWGcBz0+ybds2MlzoLLp5DiSZl2Qr4KfAg5Nsm2RT4JkTHHpruvkXAH8yUY0DxxurjjOA3wzFamGLJI+oqlVV9U90Q7D26HEMSZoVDA+SpHXWJgXfVFX3AHtU1UTDll4BvCTJ5XRzJP6yxyH+L92cgG8nuRp4HvDs6lwJ/CNwbpLLgLe3ff4SODjJKuBi4NFV9WvgDXSfCPUZYKJ3+o8FTm0Ts2+crMAJ6ngFsCzJ5Um+AbysrT86yRWt7Wrgcz2ugyTNCn7akiRJkqRevPMgSZIkqRfDgyRJkqReDA+SJEmSejE8SJIkSerF8CBJkiSpF8ODJEmSpF4MD5IkSZJ6MTxIkiRJ6uX/AwoG6naUQhLVAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "bigrams[0:10].sort_values().plot.barh(color='skyblue', width=.9, figsize=(12, 8))\n", "plt.title('10 Most Frequently Occuring Bigrams')\n", "plt.ylabel('Bigram')\n", "plt.xlabel('# of Occurances')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(not, possible, before) 1\n", "(industries, from, healthcare) 1\n", "(networks, have, become) 1\n", "(to, transportation, and) 1\n", "(become, even, more) 1\n", " ..\n", "(advancements, in, natural) 1\n", "(fields, machine, learning) 1\n", "(even, more, powerful) 1\n", "(of, industries, from) 1\n", "(grow, in, the) 1\n", "Length: 105, dtype: int64\n" ] } ], "source": [ "# trigrams: n=3\n", "trigrams = (pd.Series(nltk.ngrams(tokens, 3)).value_counts()) \n", "print(trigrams)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }