├── README.md ├── Untitled-checkpoint.ipynb └── fuzzywuzy (1)-checkpoint.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # AI-models-for-community 2 | -------------------------------------------------------------------------------- /Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /fuzzywuzy (1)-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 87, 6 | "id": "78eda1a1", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "data": { 11 | "text/html": [ 12 | "
\n", 13 | "\n", 26 | "\n", 27 | " \n", 28 | " \n", 29 | " \n", 30 | " \n", 31 | " \n", 32 | " \n", 33 | " \n", 34 | " \n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | "
PostalAddressPostalCodeCityCountryBORedirectCode
016 Boulevard des Italiens75009.0ParisFR30.0
1Rue de la Terre Victoria35760.0Saint-GrégoireFR36.0
266 Avenue du Maine75014.0ParisFR32.0
313 Rue de Copenhague67300.0SchiltigheimFR40.0
460 Rue Lavoisier38330.0Montbonnot-Saint-MartinFR39.0
..................
31422 RUE DE PRESIDENT WILSON3200.0VICHYFR962.0
315600 Boulevard Albert Camus69400.0Villefranche-sur-SaôneFR962.0
31622 Avenue des Nations93420.0VillepinteFR1061.0
3178 Cours Louis Lumière94300.0VincennesFR900.0
3188 Cours Louis Lumière94300.0VincennesFR900.0
\n", 128 | "

319 rows × 5 columns

\n", 129 | "
" 130 | ], 131 | "text/plain": [ 132 | " PostalAddress PostalCode City Country \\\n", 133 | "0 16 Boulevard des Italiens 75009.0 Paris FR \n", 134 | "1 Rue de la Terre Victoria 35760.0 Saint-Grégoire FR \n", 135 | "2 66 Avenue du Maine 75014.0 Paris FR \n", 136 | "3 13 Rue de Copenhague 67300.0 Schiltigheim FR \n", 137 | "4 60 Rue Lavoisier 38330.0 Montbonnot-Saint-Martin FR \n", 138 | ".. ... ... ... ... \n", 139 | "314 22 RUE DE PRESIDENT WILSON 3200.0 VICHY FR \n", 140 | "315 600 Boulevard Albert Camus 69400.0 Villefranche-sur-Saône FR \n", 141 | "316 22 Avenue des Nations 93420.0 Villepinte FR \n", 142 | "317 8 Cours Louis Lumière 94300.0 Vincennes FR \n", 143 | "318 8 Cours Louis Lumière 94300.0 Vincennes FR \n", 144 | "\n", 145 | " BORedirectCode \n", 146 | "0 30.0 \n", 147 | "1 36.0 \n", 148 | "2 32.0 \n", 149 | "3 40.0 \n", 150 | "4 39.0 \n", 151 | ".. ... \n", 152 | "314 962.0 \n", 153 | "315 962.0 \n", 154 | "316 1061.0 \n", 155 | "317 900.0 \n", 156 | "318 900.0 \n", 157 | "\n", 158 | "[319 rows x 5 columns]" 159 | ] 160 | }, 161 | "execution_count": 87, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "from fuzzywuzzy import fuzz \n", 168 | "from fuzzywuzzy import process\n", 169 | "import spacy\n", 170 | "import pandas as pd\n", 171 | "import requests\n", 172 | "#test\n", 173 | "fuzz.ratio(\"Catherine M Gitau\",\"Catherine M Gitau\")\n", 174 | "import pandas as pd \n", 175 | "df=pd.read_excel(r'C:\\Users\\DELL\\Downloads\\rules_normalized.xlsx')\n", 176 | "df" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 88, 182 | "id": "538ab7f2", 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "from serpapi import GoogleSearch\n", 187 | "def search(input):\n", 188 | " from serpapi import GoogleSearch\n", 189 | " input1 = input\n", 190 | " params = {\n", 191 | " \"q\": input,\n", 192 | " #\"hl\": \"ar\",\n", 193 | " #\"gl\": \"tn\",\n", 194 | " #\"hl\": \"en\",\n", 195 | " #\"gl\": \"usa\", \n", 196 | " \"hl\":\"fr\",\n", 197 | " \"gl\":\"fr\", \n", 198 | " #\"api_key\": \"301c1b9293522ace5d63e62deb0df40ff405af15acf4e2b18ae482affaffd77c\"\n", 199 | " \"api_key\":\"8f43d882fd76170fb23d461b5958d19434aca61bb16444e9d4f59dab2fa18f9a\" #link : https://serpapi.com/search\n", 200 | " }\n", 201 | " \n", 202 | " search = GoogleSearch(params)\n", 203 | " results = search.get_dict()\n", 204 | " try:\n", 205 | " output = results['search_information']['spelling_fix']\n", 206 | " except:\n", 207 | " try:\n", 208 | " output = results['organic_results'][0]['snippet_highlighted_words'][0]\n", 209 | " for i in range(len(results['organic_results'])):\n", 210 | " for j in range(len(results['organic_results'][i]['snippet_highlighted_words'])):\n", 211 | " if input1 == results['organic_results'][i]['snippet_highlighted_words'][j]:\n", 212 | " output = input1\n", 213 | " except:\n", 214 | " output = input\n", 215 | " return output" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "id": "679bf99d", 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 89, 229 | "id": "f7afe68c", 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "import spacy\n", 234 | "import pandas as pd\n", 235 | "\n", 236 | "nlp = spacy.load(\"fr_core_news_md\")\n", 237 | "df['PostalAddress_'] = df['PostalAddress'].apply(nlp)\n", 238 | "df['City_'] = df['City'].apply(nlp)\n", 239 | "\n", 240 | "\n", 241 | "def match_addr(input_word, df):\n", 242 | " input_word = nlp(input_word)\n", 243 | " similarity = [fuzz.ratio(input_word,row) for row in df['PostalAddress_']]\n", 244 | " return df.iloc[similarity.index(max(similarity))]['PostalAddress_'].text\n", 245 | "\n", 246 | "def match_city(input_word, df):\n", 247 | " input_word = nlp(input_word)\n", 248 | " similarity = [fuzz.ratio(input_word,row) for row in df['City_']]\n", 249 | " # similarity = [jaccard_score(input_word.vector, row.vector) for row in df['City_']]\n", 250 | " return df.iloc[similarity.index(max(similarity))]['City_'].text" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 90, 256 | "id": "41987635", 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/html": [ 262 | "
\n", 263 | "\n", 276 | "\n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | "
PostalAddressPostalCodeCityCountryBORedirectCodePostalAddress_City_
016 Boulevard des Italiens75009.0ParisFR30.0(16, Boulevard, des, Italiens)(Paris)
1Rue de la Terre Victoria35760.0Saint-GrégoireFR36.0(Rue, de, la, Terre, Victoria)(Saint-Grégoire)
266 Avenue du Maine75014.0ParisFR32.0(66, Avenue, du, Maine)(Paris)
313 Rue de Copenhague67300.0SchiltigheimFR40.0(13, Rue, de, Copenhague)(Schiltigheim)
460 Rue Lavoisier38330.0Montbonnot-Saint-MartinFR39.0(60, Rue, Lavoisier)(Montbonnot-Saint-Martin)
........................
31422 RUE DE PRESIDENT WILSON3200.0VICHYFR962.0(22, RUE, DE, PRESIDENT, WILSON)(VICHY)
315600 Boulevard Albert Camus69400.0Villefranche-sur-SaôneFR962.0(600, Boulevard, Albert, Camus)(Villefranche-sur-Saône)
31622 Avenue des Nations93420.0VillepinteFR1061.0(22, Avenue, des, Nations)(Villepinte)
3178 Cours Louis Lumière94300.0VincennesFR900.0(8, Cours, Louis, Lumière)(Vincennes)
3188 Cours Louis Lumière94300.0VincennesFR900.0(8, Cours, Louis, Lumière)(Vincennes)
\n", 402 | "

319 rows × 7 columns

\n", 403 | "
" 404 | ], 405 | "text/plain": [ 406 | " PostalAddress PostalCode City Country \\\n", 407 | "0 16 Boulevard des Italiens 75009.0 Paris FR \n", 408 | "1 Rue de la Terre Victoria 35760.0 Saint-Grégoire FR \n", 409 | "2 66 Avenue du Maine 75014.0 Paris FR \n", 410 | "3 13 Rue de Copenhague 67300.0 Schiltigheim FR \n", 411 | "4 60 Rue Lavoisier 38330.0 Montbonnot-Saint-Martin FR \n", 412 | ".. ... ... ... ... \n", 413 | "314 22 RUE DE PRESIDENT WILSON 3200.0 VICHY FR \n", 414 | "315 600 Boulevard Albert Camus 69400.0 Villefranche-sur-Saône FR \n", 415 | "316 22 Avenue des Nations 93420.0 Villepinte FR \n", 416 | "317 8 Cours Louis Lumière 94300.0 Vincennes FR \n", 417 | "318 8 Cours Louis Lumière 94300.0 Vincennes FR \n", 418 | "\n", 419 | " BORedirectCode PostalAddress_ \\\n", 420 | "0 30.0 (16, Boulevard, des, Italiens) \n", 421 | "1 36.0 (Rue, de, la, Terre, Victoria) \n", 422 | "2 32.0 (66, Avenue, du, Maine) \n", 423 | "3 40.0 (13, Rue, de, Copenhague) \n", 424 | "4 39.0 (60, Rue, Lavoisier) \n", 425 | ".. ... ... \n", 426 | "314 962.0 (22, RUE, DE, PRESIDENT, WILSON) \n", 427 | "315 962.0 (600, Boulevard, Albert, Camus) \n", 428 | "316 1061.0 (22, Avenue, des, Nations) \n", 429 | "317 900.0 (8, Cours, Louis, Lumière) \n", 430 | "318 900.0 (8, Cours, Louis, Lumière) \n", 431 | "\n", 432 | " City_ \n", 433 | "0 (Paris) \n", 434 | "1 (Saint-Grégoire) \n", 435 | "2 (Paris) \n", 436 | "3 (Schiltigheim) \n", 437 | "4 (Montbonnot-Saint-Martin) \n", 438 | ".. ... \n", 439 | "314 (VICHY) \n", 440 | "315 (Villefranche-sur-Saône) \n", 441 | "316 (Villepinte) \n", 442 | "317 (Vincennes) \n", 443 | "318 (Vincennes) \n", 444 | "\n", 445 | "[319 rows x 7 columns]" 446 | ] 447 | }, 448 | "execution_count": 90, 449 | "metadata": {}, 450 | "output_type": "execute_result" 451 | } 452 | ], 453 | "source": [ 454 | "df\n" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": 91, 460 | "id": "52d86524", 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "name": "stdout", 465 | "output_type": "stream", 466 | "text": [ 467 | "https://serpapi.com/search\n", 468 | "https://serpapi.com/search\n" 469 | ] 470 | } 471 | ], 472 | "source": [ 473 | "input_adresse = search(\"PRESIDENT, WILS\")\n", 474 | "input_city=search(\"Viencenne\")\n", 475 | "addr=match_addr(input_adresse, df)\n", 476 | "city=match_city(input_city, df)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": 92, 482 | "id": "f4a15b22", 483 | "metadata": {}, 484 | "outputs": [ 485 | { 486 | "data": { 487 | "text/plain": [ 488 | "('22 RUE DE PRESIDENT WILSON', 'Vincennes')" 489 | ] 490 | }, 491 | "execution_count": 92, 492 | "metadata": {}, 493 | "output_type": "execute_result" 494 | } 495 | ], 496 | "source": [ 497 | "addr,city" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "id": "2d0eb22c", 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "id": "61b51de8", 512 | "metadata": {}, 513 | "outputs": [], 514 | "source": [ 515 | "\n" 516 | ] 517 | } 518 | ], 519 | "metadata": { 520 | "kernelspec": { 521 | "display_name": "Python 3 (ipykernel)", 522 | "language": "python", 523 | "name": "python3" 524 | }, 525 | "language_info": { 526 | "codemirror_mode": { 527 | "name": "ipython", 528 | "version": 3 529 | }, 530 | "file_extension": ".py", 531 | "mimetype": "text/x-python", 532 | "name": "python", 533 | "nbconvert_exporter": "python", 534 | "pygments_lexer": "ipython3", 535 | "version": "3.9.7" 536 | } 537 | }, 538 | "nbformat": 4, 539 | "nbformat_minor": 5 540 | } 541 | --------------------------------------------------------------------------------