textnavi/python-test/analysis.ipynb

132 lines
600 KiB
Plaintext
Raw Permalink Normal View History

2019-05-14 20:08:35 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Match: 100% in URL: artikel/artikel3.txt\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAE5CAYAAABS724NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzsvceTZEl+5/dxf/qFjozUqrRoUa1HNmZngMHsAuAuljTQljTeabYn/gk8kTdeeOSa4ca1NcMaBUCABGYGA4zu6e7prmpR1aVVZlZmRkZGhnzSnYcXEZVZuqqrS3TFx6y7MiPee+lPuX/9p1xorRkzZsyYMWPGjBnz5ZFPuwFjxowZM2bMmDFfF8bCasyYMWPGjBkz5jExFlZjxowZM2bMmDGPibGwGjNmzJgxY8aMeUyMhdWYMWPGjBkzZsxjYiysxowZM2bMmDFjHhNjYTVmzJgxY8aMGfOYGAurMWPGjBkzZsyYx8RYWI0ZM2bMmDFjxjwmzKfdgAHj8u9jxowZM2bMmOcFcbcvnhVh9ZWSpoprKw3CMMFzLaaniqRKo5RCpZr6VgfPs7Btk/pWh+mpIp5rkaQKyzQAEAJM00CIu17Lx9de1SdWO0TpFq45hxAGSkeARukYiYVllFE6RGCAkCgVIoUNQqB1jNLhYB+BJcsIIQGBRqN0iMQcHDdEChcprK/8vJ4ESZLS6UX846/O4HsOf/Tdo5imRAhBEMbUGx3e++gyE5Uc3//2kafd3BeCbi/i9Pk1fv7eOQCklDi2SbHgcmh5kuWFCWYmi0+sPWmqaHUCTp2+zoGlSRbnKk/sbz8qcZJybXWblRtN3nxlkZzvPO0mjRkz5i68EMJKKc3aWpNuLyLn20RxShBE2LZJueRzY2OHaiVHIe+yurpNmip83yZNFYbMvKXuQJA9GTSJ6tCLr2LJEhpNrLYRZAJLIIlVk1i1MISDIfOAwBAuSodE6SZSOCidIgAlA6S0B6IqBgToFKVDYt0iZx3AllWEMJ7Q+X11pErT7YX86v0LVMs5/sW3DmMYEiEgjlPqjQ6/fP88+xdrY2H1hAjDmHOXNvnrfzjF0YPT5HMOhpSjicz2To+3Xl2iXPJHE5mvkn4Ys7a+w+8+vkLOd54LYdXuBFy4ssnn59Z46fDMWFg9BGGY0O2GFAouhiFIU0WSKLQGrTVKaRzHRGuN1mDbBkmiADAMiVKaNFWkqUIphW2bCJEdx7JMQKO1RgiJ1gohBOYTeI7HPLu8EMJKa00cp6SJotMJuXj5Ao1Gl+WlCb737hGkFDiORc53EFJw4eIGWmtM06DXjwCYniwyWStgGF+9xUoKD1PmMjMZgkTt0I9XMGUBgUGsmnTji0TpFrZRxrOW8c0lUmHTT67RDD6i5L6RiTId00uuYoo8mphYtfDMJcJ0nW58gSjdYjb/bzHtAsbXQFg9CsOFyLN/NJqhjVcwNFAOLZVKZ52oFCLrmAdebJFtNLINCyHueNzBUdlt+Lx92+zWa317W+593Du3eXgc9PC4u9u9ty27//692rb7+A+K59n89//dH3BgqUaj2eWL8+v83c8+5eLVOmj41lsHKBXcm20Z/NF7Xbeb7db3vR7Dc9hu9rh4rc5Ws0MQxqRK7Tn+7ut287zv/Vzs2VZkjb/TNb71et5s7e3nt/vcVtd3uL62zU67T5IoUqX2PHO7j3vrse/0bL5ItFo9zp1b5+WX5/E8m14vpN0OUCobF6IoYXq6SJpmAqpazdFq9QHI512CMCboR/R6Ef1+zORkAcOQ9PsxlYo/2s+2TeI4wbIMcrlsQv6iXesxGS+EsBJCYNkGlUqOYsEjTlJKRY+Jap6dnT4bm20EgjhKWF9vYVoGUgqifkQQxJjmk43xT9QOQbJGkKzimfNEaYNYNfGtZaK0jtIhtlHDNScBA1Pk8a1lOtE5wmQDKVxso4rWKbFqolEE6SoAlizjWQsIIUl1D8+cxzFqiK+JK/BRiOKE62tNfv3hBVbWmvSCGN+1OXJgijdeXmR5oTrqIH/x3jn++bfn+MF3jnLxap2LV+uEUcLURJ43Xlni7RNL5DwHITKXU6cX8qsPLnDu0gZb2z0MKTiwVOPVY3OcOL6AlNlxe0HMxat1Ll2pkyrFRCXHh59cpd7o4Hs2J47P8/aJZWanSiSp4tpqg1OnV/jiwjrNVh8pBdVyjoPLNb73zcOUi/5okL50bYuLV+ts1tvMz5a5cGWTi1e3CKOEE8fmefPVRQ4uT3J9bZvffHgRKQX/zb95Z7S/UppOL+BvfvIJQZDw7/71W3iejfkIkwwpBJ5rMTtVopj3iJKUU6ev85NfneHY4RmKeRetNf0w4r3fX+bz82ts1NsYUrI4V+HlI3O8fWIJw9j7Tl5ZaXD67Bqnvlil0wlIlSLvu7z28jyvHV9gZrJEP4j48S9Oc+rMCtfXmtQbHVrtgL//58+RUnDi2AKvvTTPoX1TAIRRwpXrW/zm95dYudEkihLyOZcjB6Z485VF5mcqo2u0Xm9nz8OVOtO1Au1uwKnTKwRhwr6FKq+/vMjbJ5YwTYNmq8/la3U+OHWVtY0d+kGMYUgWZyu8/vICb51YxpCCKE65cn2LH//iDJevb3FjsO3/8h9+gmObTJRznDg+z2svLTJRyQGZVfbk6eucOr3C1dVttNJM1QocOzTNN1/fn00eX7CxPlWaIIxHE4MkUayt7WCacmTNBkGn02d9o8W16w163RDft6nVCpw+s4rvOVSrOZTKDtLvx1y7tsWVK3WEyN6RfhBjWwYzM2X275/EsowX7lqPyXghhJWUgrnZCjnfIefbhNE0UmTuPdexWFqoks+7+J7N/v2T2LaBlJIkyaxchiEpl7wnNvsQwsQyKhSsI1iyjBQ2pvBxzKnsZ1lCCJnFRWmNFDaWLOIYk2ArfL0Pz1xA6QhD+AghSFQXgcSSJWxZQZgSQ3iDv1UdxGC9mFy4UueTMytcW21SyLuUSz5Jojh7cQPTkCBg/2INgPXNNu+fvDJ4lhxmp4okiWJ1vcnn59bI+w4njs/j2CabjQ4ffnKFC1fqaA0LM2WU1mxstTl5egXbNtm/WMP3bNIkpdnq8enZVYIw5tjBaUoFD9exsC0T17FGbmk0pKnGkJJiwaVU8EhSRbPV5/2TV5ibLnPkwBTlog9Apxty8Wqdz8+u8Wowh2FIFmbLRFFKIe9iGgZSCFzHYmOrzU47YG1jh4lKDtexCKOES1e3uLrSwLLMbDB6xGsthMAwJK5j4Hs2rx6d48bmDqdOr1BvdJiaKBCEMe+fvMz5K3WiKGVhpoLSmmarz8nT13EdkwNLtdEkaaPe5vefXOX85U0816KQc9AaLFPi2BZSZpYgw5BMVPLUKnm2d3p0uxZTEwUW5yoIIahVs/MdcvbSOp+eWWVtY4dy0cMwJHGs+PzsGpZpoDUszVcB6AcxKzeavPfxJQ4tT1Ip+8zPlInilEo5h2UZI1Of1ppUaRzbZHIij9aQpIorK1tIKZieLDI3XRqIUJvZ6RLNVo9WOxPQC7MVcp5NMe9SKvqjiV8/iDl9bo1Pv1ij0ewxP11GqUzcnzq9Qs5zOHJgeiTCXhRUqgj6MTfWdyiXfIIgZmOjxeRkAWlIOp2Qfj+i2eyxsdHCdS3CIKbfjwnDbLI9O1tGSkGr3afXy6xX6+sthLxpZUxTRS7nEMcpcZwM7stYWb2IvBDCyjAky4sTo9+PHJpGDaYvUghqtcLou7nZMrDXDfOkzbmmzGPKPDlr/23f2UZ18NPA35A5EADI2fvJsZ/hy6zRYO3yTQBiUGHDkB6OOX3bMb5OdHrhQAxkM8deL2Jto0UYJsDNAe7U6RV+/+lVjh2c4QffOcp0rcB6vc1//tvf89m5NTQ3hZVGE8UJKzea/PmPTvDWq0sIKfhP//cHXF9r8vFn1zh2cBrTkFxf2+Zvf/opxw7N8N23D3Li+DxhlPK3P/2E85c36Acx1XIO37NHbd7a7hJGMa5j8YffPUa56JGmmn4QUchnbjIpBeWix4njmbVpulakH8T802/O8tc/PsXZi+t
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Match: 100% in URL: artikel/artikel2.txt\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAE5CAYAAABS724NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzsvWePJVeep/ec8Nf79KayvGEZsujaTk/39PTsSD2jnl3MYIQRVhIE6IVeSIAAQR9An2AB6Y2AHUCrWe1K0Gih0RhtU93sYZMssmmKxfJZJr25mde78Ecv4uatLFZmOSbZZPd9qoDKyhs34kTEiTi/83dHSCkZMmTIkCFDhgwZ8vlRft0NGDJkyJAhQ4YM+U1hKKyGDBkyZMiQIUMOiKGwGjJkyJAhQ4YMOSCGwmrIkCFDhgwZMuSAGAqrIUOGDBkyZMiQA2IorIYMGTJkyJAhQw6IobAaMmTIkCFDhgw5IIbCasiQIUOGDBky5IAYCqshQ4YMGTJkyJADQvt1N6DPsPz7kCFDhgwZMuTrgtjvg6+KsBryFSAIQurVDh9dusudG2uomoqiCFLpGONTOU6dnyaTS2AYv13dptt1WN9s0mr3KOSSTE/lD3T/65sN7i9ssbRcpVRMcWi2yJG50oEeA8DzAtY3GzQaXUxT4/jRsQM/xpAhXxaNVo+3PrjL9FiWQ1MFMqnYr7tJQ56BhtdipbvOx/VrOIHzyOd5I8fvj32Hle42duAyFsuz2NkgoyeZjBUxVJ1AhjiBhxO65PQUIZJAhsRVk0CGBIQgJbqi0Qtc7MBBEQoJLYahaAQypO62UYWCpRqYio4TenihTyBDJJKYamKpxjOd22/XCPklIWUI0kHiI1BAxIGd33kIkQBUIkOdRxhsAgqKkgGRQCCQuEjZQ2CA0Pvb+4ThNkgfoWQRIk7kzfWRsosQFmAghNJvh4OUHkLoPLjVPsgwOrZQABMhIuEdhpJWs8eVDxb4+L17nHhhEpCUVYXyeh2753Hq/DST03mEIgbf+6rieQGO7eG5PlbMIBZ/todjhyCUOI5Ht+uSTPgH3ErwPJ9arcPNW+tsV9qYpvaFCCspJa7r0+u5fBFrhAZBiOf6bG020XWVdDZOLG7QbPTwXB9VVWi3bJJpi3Qmju8HBH5It+PQbTuUxtKEocSxPQB6XRczplMopoDofvpegFAEhqGhqs8fyRAEIa22je146JpKIZ88kGvwLMev1jrUG108LyCVsijmk8Riz9ZHpZR0ey6qomBZ+p7b9GyPTsfBdX0ymRiJuHkQp/BrJwhD6s0ehWycIAh/3c0Z8ox0/R4L3VV+tvk2Lb/zyOeH4lN8t/Qay90y224DJ3S51ljgUGKMkpnFCT1qbpte4KApCkktRse3afs9JmJF6l6bUIYkNQs7cKm6Ldp+j7hmoSsaQRhQ99o0vDa+DImrFlPxEpt2lV7gIBC0/C4TsSKjVh6Fpx/zhsLqgIkGrEgAybCFECaKdgjwCMMyYVBF1Y/2xZVPGNZxnZ8iMNGMl1C1o0gUZNggDBYRSh6hFBEigZQtPOctZNhCN15B1Y6AMJFhg8C/h6JOo6glpNwRSlVkWEcoORQlC1IShnXARUo/aps6gZQ81GFMS+f0hWn+u//hT3Bsj+ufLPHLN67z13/1Lj/2AkpjGUzzq991uh2H9ZUqtUqb6dkiU4eKz7wPKSWJuMnJ42OAYPdztSNOHmiUnR92thOD30Xb7BYzD/Y1M1UglbBQFIVGo/fI8Xe+L0R0n6SUg/8/vO3Dbej/HXxH01QOHypy+FBp3/PYfYydvYnHHGNnP0IIPM+nstXijb/7hGwuwYVX5pg+VOTe7Q0qWy0SSZPb19c4eXaKcxcP0W72aLdsFu6UuXNznR/98Ys4tsfmeh0hBEv3txibzPHd3zuDEIJWo0ez0UXTVYql9HMLZQDH8blzr8zaep1sJs53v3X8uff1PLiuz+VPl3j/w/tUa13Onp7k+79zipnnsIaur9exLJ2pyb2/W622mb+7yeZWk4sXDnH08Mjnbf5XgnwmwV/88Su/7mYM+YKRQMvrcq+9jhv6WKqBoWis9ipcqd8hkCEvZOYAKDs11nrbJDSLW80lVKFwIjXNhl2jbNdwQo+xWJ6CkabiNbnWuEdGT1J2aggEOSPJ9eYC3cAmrSWYb63wUu44OSONpew9cdmLr/7o+DVDhlWCYJHQX0KIOIo2TTQ0KYRhlcC/gaJmEYoWWYykjwx7oGiAHm0rbaTsIkMHz/0Zmn4KzXgNZICUNhIXhEZkrRJI6eB719GEilASCBHDdy8jZQcpPWT4Xv/7Ib53GUUpRZYvoWFYf9C3fO3dFSxL5/jpSQxTZ3O9wfpKlXu3Njh1burLuqTPTb3a5va1VQBKo+nn2se9hS1u3tpg/u4mk+NZzpye5PTJCQCqtQ73F7a5cm0FRQhczycIQpJJi2+8eoQjcyNIKVnfaHDtxhp37pZRFIHvB8zNlfi9751C17VHBNJuGo0eSysVrlxb4dvfOMahmSK9nscbb14nl41z7Ogoo6U0n15bZf7uJuWtJlJCPG4wN1PkO986jhCwsdng+s115u9skkpZHDsyyqsvRy+jjc0G9xe3aXccTh4bY2a6gJTw/of36XZdisUkJ46NsbJaY/7OJvcXt5ASspk4h2YLXLwwi2nqGLpGrpDk5JnJh87B9wOEiFzKQhE4PQ/H9khn4sTjJq1mj/XVGlbMoFZp0+04vPTaEcIgRNVVKtstWo0elXIT2/YolFJks4nnup87mKbGsSOjzEwX0LUvP4fHMDReOj+LpqncW9j+XNbfDy4vUiwk9xVWhUJkCXNdn/TQXTbka4YiBGNWnhdzx7nauEsoQ7acOiEho1aOuGpxKDGGpRpMxIooKFxvLLDl1Bmz8qT1JIai44Quy90yy90yY1YeN3TpBD3OZo+Q1GPU3TYVt4ETeuT0FCfSMzihh6nquIH7myOspIz8pdeat1jqrn3u/QnEYAavoqIqKprQMBUdUzWwFIu4ZpHUkiS0GDHVeo42NwiDZVDiCKWEouSIhFXf9ScdkC4QAjpCiSOEgRDJ/rYKYVgnDDYIw21C/z6hUgIUELG+a1AilOIucaUAbn+/PhAQBEsIkUJR8gThMjJsIGWXMCijanMI2SKUbZBdECb7dQVVi1w604eKjE/l6bQdVpcqnDo3RbvZY32lxvLiNo7toSgK2Vyc2SMj5ApJTEvHsT1Wlyp9l1xAr+fSrHfxXJ9kOsaLr84RT1oEfkCz3uPurXWa9S6+HxKGklwxyeR0ntkj0SzbdTxq1Q73bm3Qadv4foimqUxM5xmfypHOxnFtj3u3N7h6eYnrnyyj6yq1Sod7tzcBOP/KHCNjmacazGKWTiym4/sBW9uthyxKruuzUW7w8eVFXrk4x9hIhmarx/zdMoemC4yPZdA1lQ8/XqBa65LPJ7BMnTAMyecSKMqTj+96PrV6l3sLW1w4NwOAHwQsr1bxPJ/pqTxSwq35DVZWaxTyCRIJk3jMIJ2ODcxNhqGRTJqEUlKrdahUWoNjaJoKEq58ukwuE2dqMk8Yhty8tY6mq0yMZ+j1XO7eK7O6XmN8LIvs7+fGrXVmpguUCslIXJka6Wwcz33gMi2NRtfBcbzIEiZAVRQMQ6NR74KE6UNFYjGDIAgJA0k6E8Ma/D9E11V8P6BR6+B7AaPjWVLsLRLqjS7rGw02NhsIIQjDkFwuwdnTk5FVsNllfbPBZrlJLGYwNpImt0uotTsO126sYpk6iiJotmxcz2dqIsfcbBFVVWi1bTbLTVZWa9ExZEgyYXH2zCSeG7BdbVOptDl1cpxkwqLZ6nH95jpjI2mKxSTJhEUhn2SklKZa69DpuI+cx/JqlbX1Or2eSxBKYpZOsZBidqZAGIZUqx2WVqp8cnWZbDqOaUYv/iNzJcZHMwCsrdfZKDdotmyymTiGoRHfZenbrrRYXa9Tr3d3HSPJ1EQOSTR5WF6pEoaSIAzx/QDL1JmZLjA5ngMetZx+kUgpqdQ7rG42WNus0+65HD9U4tDkwzF
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Match: 100% in URL: artikel/artikel1.txt\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAE5CAYAAABS724NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzsvemPXted5/c559z92Zfaq1hcRYmURMuSd7fbbXe7p6e7M+lMI0EnmARBAgQIkAQIkH8gSN4kyMvkRYLGBJjJgrwYJ9PbtNFtu9urbMuiFpIiKa61Vz1Vz77c7Zy8uE+ViptEUqQlup8PQdRT9dznPPeee++53/PbjjDGMGHChAkTJkyYMOHjIz/pHZgwYcKECRMmTPh1YSKsJkyYMGHChAkTnhATYTVhwoQJEyZMmPCEmAirCRMmTJgwYcKEJ8REWE2YMGHChAkTJjwhJsJqwoQJEyZMmDDhCTERVhMmTJgwYcKECU+IibCaMGHChAkTJkx4QkyE1YQJEyZMmDBhwhPC+qR3YMyk/PuECRMmTJgw4VlBPOiNT4uwmnAftDZsrTcZ9ENsWzF/pIZlqTu22VjdY6/RZXq2TLEc4Hr2Y31Xu9mn2egxt1TBcW2EeOA1M+FXSLM3oDUYIYDd3pD2YEQYx+RcB9exKAc+y1NlbKVQ8ukaoHf6fd7d2eL7t248cBtHKf7o9BmOlSv49sNfi4nWbPa6/GRthXe3tx643VKxxB+dPkPRdbGVeuB2vy50o5Dr7SaX9rZZyBf5jYWjn/QufWyaoyFvNzZJtWYhX+R0deqebf7FxTcJbIffWjpOyXGf6rWdaM2N9h6Xmw1infKPj53GVQ//aDRmiDEJQuSBBEgxBoRwMKaDMSOEyCGEP34vQojgYFtwwIQYYjApBo2UeYTwMcag9Q6YEWAhZAkhXCBG6xbgImWAEB4AWrcx422lLAIGrXeJo3dQag6l5pGq9kT770lgTAIkYBIQCowGNAg7+xsCRNZPIMfbxCAswGR9ig3EGNKsf00y3tYG0wPs8TmQ8JSfbxNh9SnGGENzt0ez0cN1beozJZq7PYb9ECEEpWqOnc02N9/fotcZUqrkKJYCcgWPNNXEUUocJcRxSr7okct5WLaksdUhihIcx6JYCRAI1m/vcvndNaJoiWq9gJ9zcVyLvZ0e4ShCKkm5kiNNNVGYHLTruBaFok+Qc5Fq4ll+0ozihN1un34Ys9Xu0eoPCeOEnOeQ91zCcspMKY/yJE9bZnSjkPd2d/j25YskWpNoTWoM+tB6oznb5rOzCywUio8krLQxNEdDfrG+xl++fyVr32hSre8wZ786O883j54gsO1/EMJqkMRcb+/xdys3eHlq9tdCWCVa041CEq2pJsl9t/mb29eoej6fm1mgYDtP9drWxrDW6/D6xgrDJOZ3lk/hPsIX6nQXbXpYahmtdzAYhMhj9O5Y6PQAgWUdx5gRqW5g26fRuokxQ6Ssk6arY9GQiSNlHUfKGka3SfUWRu9mbdjnMiGn2+Pv0mPBNIcxA9JkHWOGCFkciwiDTneIo59h7JcRsojk0yesMCO03sXo7bHoFJm4EgrQYAQIhdHtTFzKPCbdBBGAcMnEUhFMB6PbGGGTqVsbIVyM3kPIOkKNt33KTITVM4LWmkE/5K3Xb3Dz/S0sW/Lql07R740Y9ELeeeMWURhTny5y6swCvd6I9l6f1m6PdrPP6ZcWOf7cHIWyz99/511auz0q9QKvfOE4Qkouv7vGj797keZul+PPzbGwXKM+U+SNH11lfWUPP+fw6pdOMhxG7Gy2aTayduszRV44d4Tjz83i/hoKK2MMBpCfkAUv52YCanWvg2MpluplXEsxihN8x6YUeMRpitaGp62sXMui5gccK1foRhG9KKQfx4yS5A5x9TgIAb5tM5PPc6RUohdFdKOIYRITJskDYwWyReQNH2KVZz/SQOxv8yHn8u5F6SeW2yfPVJDjD44//0nvxhMjTVdI0zWkKBBFbyCEj20/TxS9QfaIjUniS+D/AUZ3iJMLWNYSaXIbrRtY9otE0c/AxAhRJI7fxnO/CVZEHL8NwiVNbmF0B8t+kTS5SZqsAoYkuYxln0W4DklyE52sAgJlHQE1BwgMMdr0x3v76ZyMGN1Gx5dIox8hZA1EAbCBAeACBqM7GL2LtE8j1RHS8PsIWUOqZaS1DCqP1m108h4m3QERjMWlAtNHOefAmvuVHM9EWD0j9Hsj3vjRVUrVPF/9nbO4rsW7b96i3wvxfIeZ+TL1mRK97pC337hBoehTKAU89+IC1XqB995Z5fqVTZ47O89oGLF4bIoz544wM19GCNhrdFlYrvOFrz3PzEKFYT/k9b+7TH22xIkX5hBCcOHNW6SpJpd3WViu8dJrR9nd7nDprRUWl2uP7Yb8NPOj1dt0w5DfO/ncJ/L9gWuzWCtRyfkgBFIIhMgmY0oKbKVwbYUln/6AWfMDvnbkKKeqdVKtSY1mbzjkx2u3+eHtW9xstx67bSUk8/kC//T5M3x9+VjWvjZcb+/xk9UV/v72TTpheO8HTZ80/B7SOoVQc+jkKggfTARmiFCz6OQWAJb7VbIHy4cLK6PNQV9/qF6bMAFQ1nEQFlH0Omm6jmUdR4g8WrewrFMoNYsxcebuMx0+uKgMoA9eSzWDbZ3BmAFIN7sG1TxJfBFMiLIWEMJH6yapXkOIAkIWxuLBwrJOEesuabqGSa6jrBNIWUHKKlKWxpat+ifSRx+F1ltovQE4GGMQhCDizN2HAeEg1RSomcxyld7OrFnCRagK0jqCMX2M3sboFkJWELIEyKw/UeP/v5rJ/zMrrPZnlr1RRGcwojsIidOUVBs822JpuoxnW0897uRpo3V2nGmi2VpvMTVXZm6piuNY/OT77zHoh+SLHvWZEovLNdZu77K308OyFLXpIlMzJRaWa1x86zbDQUiQ9zj23Cw61bRbfYK8S7maI8i5BHmX+myRSi1POIzYXGuysFxnfqlGkqQ0d3sA5Aoe1XqBhSM1Br2Q1l6PNH04i4UxhihN2er32Oj1iHXKfL5AznboxZmFYpQkSCFYKpYIk2RsFYkZJglV32cmyONZFrc7bVqjIUII5vMF8o5LlKbcbDeJ0pSi67JQKOIpi51Bn53BgERrSp5L1QsIbJsbrSbDJMa3bObzBTzLYm80YK3TwbUsLjV2PjFrFYClFDkpCVznnmf8r9qa4lkWs/kCs/nCwd92Bn22+j3Ob258rLalEAS2w3LJYblUOfh70XVZabdxHigcE3S6ipDTCFkHvQsijzEjjG5A8v54UF4ie6Dd22daa9IkJU00OtXEUUJnt0+pnsfPe6SJxnYUaaJJ4hTXs0mSFCEEzhOcTIRJwmqvzXvNBs3REEtIhBD044hYp3dsm2rNXjjk8l6D7UGPYZJgS8lcvsDJUo2ZXP7guk20ph2OuLC7xfagzyCJMcYQ2A7HSxVOlmsUHPdg+1RrVnsdbndabPS7DNMEAdS8gGOlCkeLFQLbZpTEbA56bPV7GCBvO7zf2qUbhbiWxbFi1nbF80m1ph2FXGk2uNlpMkoS5nIFTparnCjf3zXViULea+7w080VojTFUxYzuTxnqtNUPP+O/d0dDbjcbLA96DNKEhylmMvlOVmuMx3kDraN05S9cMi7jS0awz6J1rjKoh/HDNP7uyU/CikrGN0mSn48dj0VQLhIUcaYIWm6jjF9wEKIIlL4xNEF0vQmh4WVED5S1ZGyiBhbljI3VhtDhBA5QCDEfvxVD1AI4WXxXDokizcaYHQLTIQQFgIHUGi9h053UdbCYx3n00TICsp6DqMWMGkDRG7s7ltHyBpCVjMBKWwwMcb0EWo2c23KWRAFBAap5sf9UQThZzFuyU0yy53OBC7O0w6xejaFVZykDKOY9mDEWqPNaqPNxl6HYZQQJym1Yo4//MIL1Iu5O4RVqjVRnNIdhhhjcB2Lou8hxKfL5B+FMVGYEIYJ3fYArQ1B3sPzbdIkpd8ZMrIUSkksSyKlwA8cbNdCSpmZMwDbUXiBg2UrhBAoJckXPE6cnuP
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Read input and preprocess data\n",
"import csv\n",
"import numpy as np\n",
"import pandas as pd\n",
"from os import path\n",
"from PIL import Image\n",
"from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator\n",
"from fuzzywuzzy import fuzz\n",
"from ipywidgets import widgets\n",
"from IPython.display import display, clear_output\n",
"\n",
"\n",
"import matplotlib.pyplot as plt\n",
"% matplotlib inline\n",
"\n",
"with open('articles.csv', 'r') as csvfile:\n",
" data = [ f for f in csv.reader(csvfile) ]\n",
"\n",
"mapping = { f[0] : f[1] for f in data }\n",
"\n",
"search_box = widgets.Text(description=\"Search words:\", value=\"South Kensington, London\")\n",
"display(search_box)\n",
"\n",
"def submit(sender):\n",
" clear_output()\n",
" \n",
" # Search inbox.\n",
" search = search_box.value\n",
"\n",
" # Search all articles that have the word\n",
" ratio = [ (fuzz.token_set_ratio(f[1], search), f[0]) for f in data ]\n",
"\n",
" ratio.sort(reverse=True)\n",
" stopwords = set(STOPWORDS)\n",
"\n",
" for x in zip(ratio, range(1, 11)):\n",
" if x[0][0] < 30:\n",
" continue\n",
" print(\"Match: %d%% in URL: %s\" %(x[0]))\n",
" plt.figure(figsize = (10, 10))\n",
" wordcloud = WordCloud(stopwords=stopwords, background_color=\"white\").generate(mapping[x[0][1]])\n",
" plt.imshow(wordcloud, interpolation='bilinear', )\n",
" plt.axis(\"off\")\n",
" plt.show()\n",
"\n",
"search_box.on_submit(submit)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}