├── CLI Model Building.ipynb
├── Data Preprocessing.ipynb
├── LICENSE
├── Model Building.ipynb
├── README.md
├── app.py
├── base_config.cfg
├── data.json
├── data
    ├── test
    │   ├── Alice Clark CV.docx
    │   ├── Alice Clark CV.pdf
    │   ├── Alice Clark CV.txt
    │   ├── Smith Resume.docx
    │   └── Smith Resume.pdf
    └── train
    │   └── train_data.json
├── ner_model
    ├── config.cfg
    ├── meta.json
    ├── ner
    │   ├── cfg
    │   ├── model
    │   └── moves
    ├── tokenizer
    └── vocab
    │   ├── key2row
    │   ├── lookups.bin
    │   ├── strings.json
    │   ├── vectors
    │   └── vectors.cfg
└── requirement.txt


/CLI Model Building.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# *Required Libraries*"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import spacy\n",
 17 |     "import json\n",
 18 |     "from spacy.util import filter_spans\n",
 19 |     "from spacy.tokens import DocBin\n",
 20 |     "from tqdm import tqdm"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "# *Loading Data*"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "with open('Data/train/train_data.json','rb') as f:\n",
 37 |     "    train_data=json.load(f)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "# *Model Building*"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "nlp = spacy.blank('en')\n",
 54 |     "doc_bin = DocBin()\n",
 55 |     "count=0\n",
 56 |     "for training_example in tqdm(train_data):\n",
 57 |     "    text = training_example['text']\n",
 58 |     "    entities = training_example['entities']\n",
 59 |     "    doc = nlp.make_doc(text)\n",
 60 |     "    ents = []\n",
 61 |     "    for start, end, label in entities:\n",
 62 |     "        span = doc.char_span(start, end, label=label, alignment_mode=\"contract\")\n",
 63 |     "    filtered_ents = filter_spans(ents)\n",
 64 |     "    doc.ents = filtered_ents\n",
 65 |     "    doc_bin.add(doc)\n",
 66 |     "doc_bin.to_disk(\"train.spacy\")"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "! python -m spacy init fill-config base_config.cfg config.cfg"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "## *Model Training*"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "! python -m spacy train config.cfg --output ./output --paths.train ./train.spacy --paths.dev ./train.spacy"
 92 |    ]
 93 |   }
 94 |  ],
 95 |  "metadata": {
 96 |   "kernelspec": {
 97 |    "display_name": "Python 3",
 98 |    "language": "python",
 99 |    "name": "python3"
100 |   },
101 |   "language_info": {
102 |    "codemirror_mode": {
103 |     "name": "ipython",
104 |     "version": 3
105 |    },
106 |    "file_extension": ".py",
107 |    "mimetype": "text/x-python",
108 |    "name": "python",
109 |    "nbconvert_exporter": "python",
110 |    "pygments_lexer": "ipython3",
111 |    "version": "3.11.7"
112 |   }
113 |  },
114 |  "nbformat": 4,
115 |  "nbformat_minor": 2
116 | }
117 | 


--------------------------------------------------------------------------------
/Data Preprocessing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 34,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import json"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 35,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "with open('data.json','rb') as f:\n",
19 |     "    data=json.load(f)"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": 38,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": [
28 |     "train_data=[]\n",
29 |     "for i in data:\n",
30 |     "    item={}\n",
31 |     "    entities=[]\n",
32 |     "    content=i['content']\n",
33 |     "    seen=set()\n",
34 |     "    indexes=[]\n",
35 |     "    for j in i['annotation']:\n",
36 |     "        label=j['label']\n",
37 |     "        label=label[0].replace(\" \",\"_\").upper()\n",
38 |     "        text=j['text'][0].strip()\n",
39 |     "        if text not in seen:\n",
40 |     "            start=content.index(text)\n",
41 |     "            end=start+len(text)\n",
42 |     "            flag=0\n",
43 |     "            for index in indexes:\n",
44 |     "                if (index[0]<=start<=index[1]) or (index[0]<=end<=index[1]) or (start<=index[0]<=end) or (start<=index[1]<=end):\n",
45 |     "                    flag=1\n",
46 |     "            if flag!=1:\n",
47 |     "                entities.append([start,end,label])\n",
48 |     "                \n",
49 |     "            seen.add(text)\n",
50 |     "            indexes.append([start,end])           \n",
51 |     "\n",
52 |     "    item['text']=content\n",
53 |     "    item['entities']=entities\n",
54 |     "    train_data.append(item)"
55 |    ]
56 |   },
57 |   {
58 |    "cell_type": "code",
59 |    "execution_count": 39,
60 |    "metadata": {},
61 |    "outputs": [],
62 |    "source": [
63 |     "with open('./Data/train/train_data.json','w') as file:\n",
64 |     "    json.dump(train_data,file)"
65 |    ]
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "Python 3",
71 |    "language": "python",
72 |    "name": "python3"
73 |   },
74 |   "language_info": {
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 3
78 |    },
79 |    "file_extension": ".py",
80 |    "mimetype": "text/x-python",
81 |    "name": "python",
82 |    "nbconvert_exporter": "python",
83 |    "pygments_lexer": "ipython3",
84 |    "version": "3.11.7"
85 |   }
86 |  },
87 |  "nbformat": 4,
88 |  "nbformat_minor": 2
89 | }
90 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Mitesh Gupta
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions: 
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Model Building.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import spacy\n",
 10 |     "from spacy.training import Example\n",
 11 |     "from spacy.util import minibatch, compounding\n",
 12 |     "import random\n",
 13 |     "import json\n",
 14 |     "from spacy.util import filter_spans"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "with open('Data/train/train_data.json','rb') as f:\n",
 24 |     "    train_data=json.load(f)"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# create a blank English NLP model\n",
 34 |     "nlp = spacy.blank('en')\n",
 35 |     "\n",
 36 |     "# Create the NER component and add it to the pipeline\n",
 37 |     "if \"ner\" not in nlp.pipe_names:\n",
 38 |     "    ner = nlp.add_pipe(\"ner\", last=True)\n",
 39 |     "else:\n",
 40 |     "    ner = nlp.get_pipe(\"ner\")\n",
 41 |     "\n",
 42 |     "# Add labels to the NER component\n",
 43 |     "for item in train_data:\n",
 44 |     "    for _, _, label in item['entities']:\n",
 45 |     "        ner.add_label(label)\n",
 46 |     "\n",
 47 |     "# Prepare training data in the format required by spaCy 3.x\n",
 48 |     "train_examples = []\n",
 49 |     "count=0\n",
 50 |     "for item in train_data:\n",
 51 |     "    doc = nlp.make_doc(item[\"text\"])\n",
 52 |     "    ents = []\n",
 53 |     "    for start, end, label in item['entities']:\n",
 54 |     "        span = doc.char_span(start, end, label=label, alignment_mode=\"contract\")\n",
 55 |     "        if span is not None:\n",
 56 |     "            ents.append(span)\n",
 57 |     "    \n",
 58 |     "    filtered_ents = filter_spans(ents)\n",
 59 |     "    doc.ents = filtered_ents\n",
 60 |     "    example = Example.from_dict(doc, {\"entities\": item['entities']})\n",
 61 |     "    train_examples.append(example)\n",
 62 |     "\n"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Iteration 1: Losses: 26612.209, Precision: 0.997, Recall: 1.000, F1-score: 0.999\n",
 75 |       "Iteration 2: Losses: 5704.469, Precision: 0.934, Recall: 1.000, F1-score: 0.966\n",
 76 |       "Iteration 3: Losses: 3952.015, Precision: 0.823, Recall: 1.000, F1-score: 0.903\n",
 77 |       "Iteration 4: Losses: 3180.710, Precision: 0.900, Recall: 1.000, F1-score: 0.947\n",
 78 |       "Iteration 5: Losses: 2740.532, Precision: 0.851, Recall: 1.000, F1-score: 0.919\n",
 79 |       "Iteration 6: Losses: 2647.418, Precision: 0.884, Recall: 1.000, F1-score: 0.938\n",
 80 |       "Iteration 7: Losses: 2200.027, Precision: 0.885, Recall: 1.000, F1-score: 0.939\n",
 81 |       "Iteration 8: Losses: 2086.462, Precision: 0.881, Recall: 1.000, F1-score: 0.937\n",
 82 |       "Iteration 9: Losses: 1896.823, Precision: 0.893, Recall: 1.000, F1-score: 0.944\n",
 83 |       "Iteration 10: Losses: 1817.931, Precision: 0.931, Recall: 1.000, F1-score: 0.964\n",
 84 |       "Iteration 11: Losses: 1680.189, Precision: 0.936, Recall: 1.000, F1-score: 0.967\n",
 85 |       "Iteration 12: Losses: 1707.834, Precision: 0.907, Recall: 1.000, F1-score: 0.951\n",
 86 |       "Iteration 13: Losses: 1603.189, Precision: 0.928, Recall: 1.000, F1-score: 0.963\n",
 87 |       "Iteration 14: Losses: 1530.553, Precision: 0.931, Recall: 1.000, F1-score: 0.964\n",
 88 |       "Iteration 15: Losses: 1418.975, Precision: 0.943, Recall: 1.000, F1-score: 0.970\n",
 89 |       "Iteration 16: Losses: 1448.126, Precision: 0.904, Recall: 1.000, F1-score: 0.950\n",
 90 |       "Iteration 17: Losses: 1277.593, Precision: 0.947, Recall: 1.000, F1-score: 0.973\n",
 91 |       "Iteration 18: Losses: 1275.569, Precision: 0.966, Recall: 1.000, F1-score: 0.983\n",
 92 |       "Iteration 19: Losses: 1246.902, Precision: 0.954, Recall: 1.000, F1-score: 0.976\n",
 93 |       "Iteration 20: Losses: 1147.991, Precision: 0.890, Recall: 1.000, F1-score: 0.942\n",
 94 |       "Iteration 21: Losses: 1112.616, Precision: 0.918, Recall: 1.000, F1-score: 0.958\n",
 95 |       "Iteration 22: Losses: 980.910, Precision: 0.916, Recall: 1.000, F1-score: 0.956\n",
 96 |       "Iteration 23: Losses: 986.483, Precision: 0.909, Recall: 1.000, F1-score: 0.952\n",
 97 |       "Iteration 24: Losses: 1034.985, Precision: 0.943, Recall: 1.000, F1-score: 0.970\n",
 98 |       "Iteration 25: Losses: 987.256, Precision: 0.945, Recall: 1.000, F1-score: 0.972\n",
 99 |       "Iteration 26: Losses: 983.464, Precision: 0.912, Recall: 1.000, F1-score: 0.954\n",
100 |       "Iteration 27: Losses: 987.474, Precision: 0.955, Recall: 1.000, F1-score: 0.977\n",
101 |       "Iteration 28: Losses: 976.155, Precision: 0.957, Recall: 1.000, F1-score: 0.978\n",
102 |       "Iteration 29: Losses: 951.699, Precision: 0.947, Recall: 1.000, F1-score: 0.973\n",
103 |       "Iteration 30: Losses: 851.523, Precision: 0.927, Recall: 1.000, F1-score: 0.962\n",
104 |       "Iteration 31: Losses: 817.878, Precision: 0.969, Recall: 1.000, F1-score: 0.984\n",
105 |       "Iteration 32: Losses: 866.606, Precision: 0.940, Recall: 1.000, F1-score: 0.969\n",
106 |       "Iteration 33: Losses: 823.616, Precision: 0.914, Recall: 1.000, F1-score: 0.955\n",
107 |       "Iteration 34: Losses: 820.650, Precision: 0.950, Recall: 1.000, F1-score: 0.974\n",
108 |       "Iteration 35: Losses: 789.587, Precision: 0.946, Recall: 1.000, F1-score: 0.972\n",
109 |       "Iteration 36: Losses: 763.384, Precision: 0.934, Recall: 1.000, F1-score: 0.966\n",
110 |       "Iteration 37: Losses: 753.573, Precision: 0.962, Recall: 1.000, F1-score: 0.981\n",
111 |       "Iteration 38: Losses: 735.642, Precision: 0.953, Recall: 1.000, F1-score: 0.976\n",
112 |       "Iteration 39: Losses: 694.879, Precision: 0.979, Recall: 1.000, F1-score: 0.989\n",
113 |       "Iteration 40: Losses: 733.312, Precision: 0.970, Recall: 1.000, F1-score: 0.985\n",
114 |       "Iteration 41: Losses: 707.260, Precision: 0.963, Recall: 1.000, F1-score: 0.981\n",
115 |       "Iteration 42: Losses: 686.931, Precision: 0.946, Recall: 1.000, F1-score: 0.972\n",
116 |       "Iteration 43: Losses: 637.460, Precision: 0.956, Recall: 1.000, F1-score: 0.978\n",
117 |       "Iteration 44: Losses: 720.086, Precision: 0.972, Recall: 1.000, F1-score: 0.986\n",
118 |       "Iteration 45: Losses: 602.563, Precision: 0.978, Recall: 1.000, F1-score: 0.989\n",
119 |       "Iteration 46: Losses: 609.871, Precision: 0.955, Recall: 1.000, F1-score: 0.977\n",
120 |       "Iteration 47: Losses: 614.682, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
121 |       "Iteration 48: Losses: 593.069, Precision: 0.973, Recall: 1.000, F1-score: 0.986\n",
122 |       "Iteration 49: Losses: 675.111, Precision: 0.960, Recall: 1.000, F1-score: 0.979\n",
123 |       "Iteration 50: Losses: 574.115, Precision: 0.969, Recall: 1.000, F1-score: 0.984\n",
124 |       "Iteration 51: Losses: 574.475, Precision: 0.965, Recall: 1.000, F1-score: 0.982\n",
125 |       "Iteration 52: Losses: 542.209, Precision: 0.964, Recall: 1.000, F1-score: 0.982\n",
126 |       "Iteration 53: Losses: 533.706, Precision: 0.955, Recall: 1.000, F1-score: 0.977\n",
127 |       "Iteration 54: Losses: 607.607, Precision: 0.961, Recall: 1.000, F1-score: 0.980\n",
128 |       "Iteration 55: Losses: 558.791, Precision: 0.982, Recall: 1.000, F1-score: 0.991\n",
129 |       "Iteration 56: Losses: 539.896, Precision: 0.959, Recall: 1.000, F1-score: 0.979\n",
130 |       "Iteration 57: Losses: 531.988, Precision: 0.974, Recall: 1.000, F1-score: 0.987\n",
131 |       "Iteration 58: Losses: 561.542, Precision: 0.978, Recall: 1.000, F1-score: 0.989\n",
132 |       "Iteration 59: Losses: 533.384, Precision: 0.962, Recall: 1.000, F1-score: 0.981\n",
133 |       "Iteration 60: Losses: 516.781, Precision: 0.979, Recall: 1.000, F1-score: 0.989\n",
134 |       "Iteration 61: Losses: 474.448, Precision: 0.961, Recall: 1.000, F1-score: 0.980\n",
135 |       "Iteration 62: Losses: 468.161, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
136 |       "Iteration 63: Losses: 502.683, Precision: 0.970, Recall: 1.000, F1-score: 0.985\n",
137 |       "Iteration 64: Losses: 457.712, Precision: 0.974, Recall: 1.000, F1-score: 0.987\n",
138 |       "Iteration 65: Losses: 492.248, Precision: 0.973, Recall: 1.000, F1-score: 0.987\n",
139 |       "Iteration 66: Losses: 501.086, Precision: 0.972, Recall: 1.000, F1-score: 0.986\n",
140 |       "Iteration 67: Losses: 499.547, Precision: 0.976, Recall: 1.000, F1-score: 0.988\n",
141 |       "Iteration 68: Losses: 448.287, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
142 |       "Iteration 69: Losses: 450.033, Precision: 0.973, Recall: 1.000, F1-score: 0.987\n",
143 |       "Iteration 70: Losses: 436.626, Precision: 0.974, Recall: 1.000, F1-score: 0.987\n",
144 |       "Iteration 71: Losses: 443.753, Precision: 0.962, Recall: 1.000, F1-score: 0.981\n",
145 |       "Iteration 72: Losses: 452.542, Precision: 0.947, Recall: 1.000, F1-score: 0.973\n",
146 |       "Iteration 73: Losses: 450.764, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
147 |       "Iteration 74: Losses: 451.362, Precision: 0.975, Recall: 1.000, F1-score: 0.987\n",
148 |       "Iteration 75: Losses: 391.670, Precision: 0.980, Recall: 1.000, F1-score: 0.990\n",
149 |       "Iteration 76: Losses: 428.624, Precision: 0.971, Recall: 1.000, F1-score: 0.985\n",
150 |       "Iteration 77: Losses: 436.983, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
151 |       "Iteration 78: Losses: 443.188, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
152 |       "Iteration 79: Losses: 381.292, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
153 |       "Iteration 80: Losses: 369.396, Precision: 0.976, Recall: 1.000, F1-score: 0.988\n",
154 |       "Iteration 81: Losses: 371.702, Precision: 0.982, Recall: 1.000, F1-score: 0.991\n",
155 |       "Iteration 82: Losses: 389.415, Precision: 0.983, Recall: 1.000, F1-score: 0.992\n",
156 |       "Iteration 83: Losses: 348.948, Precision: 0.973, Recall: 1.000, F1-score: 0.987\n",
157 |       "Iteration 84: Losses: 380.144, Precision: 0.969, Recall: 1.000, F1-score: 0.984\n",
158 |       "Iteration 85: Losses: 405.729, Precision: 0.965, Recall: 1.000, F1-score: 0.982\n",
159 |       "Iteration 86: Losses: 375.188, Precision: 0.968, Recall: 1.000, F1-score: 0.984\n",
160 |       "Iteration 87: Losses: 387.213, Precision: 0.972, Recall: 1.000, F1-score: 0.986\n",
161 |       "Iteration 88: Losses: 370.844, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
162 |       "Iteration 89: Losses: 355.205, Precision: 0.979, Recall: 1.000, F1-score: 0.989\n",
163 |       "Iteration 90: Losses: 360.518, Precision: 0.979, Recall: 1.000, F1-score: 0.989\n",
164 |       "Iteration 91: Losses: 357.224, Precision: 0.957, Recall: 1.000, F1-score: 0.978\n",
165 |       "Iteration 92: Losses: 367.662, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
166 |       "Iteration 93: Losses: 344.491, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
167 |       "Iteration 94: Losses: 354.471, Precision: 0.983, Recall: 1.000, F1-score: 0.991\n",
168 |       "Iteration 95: Losses: 307.208, Precision: 0.980, Recall: 1.000, F1-score: 0.990\n",
169 |       "Iteration 96: Losses: 319.637, Precision: 0.970, Recall: 1.000, F1-score: 0.985\n",
170 |       "Iteration 97: Losses: 332.045, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
171 |       "Iteration 98: Losses: 330.712, Precision: 0.982, Recall: 1.000, F1-score: 0.991\n",
172 |       "Iteration 99: Losses: 357.091, Precision: 0.967, Recall: 1.000, F1-score: 0.983\n",
173 |       "Iteration 100: Losses: 338.934, Precision: 0.964, Recall: 1.000, F1-score: 0.982\n",
174 |       "Iteration 101: Losses: 324.915, Precision: 0.983, Recall: 1.000, F1-score: 0.991\n",
175 |       "Iteration 102: Losses: 355.408, Precision: 0.977, Recall: 1.000, F1-score: 0.989\n",
176 |       "Iteration 103: Losses: 367.118, Precision: 0.981, Recall: 1.000, F1-score: 0.990\n",
177 |       "Iteration 104: Losses: 347.553, Precision: 0.987, Recall: 1.000, F1-score: 0.994\n",
178 |       "Iteration 105: Losses: 325.273, Precision: 0.969, Recall: 1.000, F1-score: 0.984\n",
179 |       "Iteration 106: Losses: 328.377, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
180 |       "Iteration 107: Losses: 292.850, Precision: 0.980, Recall: 1.000, F1-score: 0.990\n",
181 |       "Iteration 108: Losses: 304.283, Precision: 0.970, Recall: 1.000, F1-score: 0.985\n",
182 |       "Iteration 109: Losses: 313.686, Precision: 0.970, Recall: 1.000, F1-score: 0.985\n",
183 |       "Iteration 110: Losses: 323.467, Precision: 0.979, Recall: 1.000, F1-score: 0.990\n",
184 |       "Iteration 111: Losses: 312.275, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
185 |       "Iteration 112: Losses: 283.199, Precision: 0.983, Recall: 1.000, F1-score: 0.992\n",
186 |       "Iteration 113: Losses: 290.470, Precision: 0.982, Recall: 1.000, F1-score: 0.991\n",
187 |       "Iteration 114: Losses: 311.933, Precision: 0.979, Recall: 1.000, F1-score: 0.990\n",
188 |       "Iteration 115: Losses: 341.079, Precision: 0.982, Recall: 1.000, F1-score: 0.991\n",
189 |       "Iteration 116: Losses: 278.338, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
190 |       "Iteration 117: Losses: 332.445, Precision: 0.975, Recall: 1.000, F1-score: 0.987\n",
191 |       "Iteration 118: Losses: 305.459, Precision: 0.977, Recall: 1.000, F1-score: 0.988\n",
192 |       "Iteration 119: Losses: 305.398, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
193 |       "Iteration 120: Losses: 284.236, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
194 |       "Iteration 121: Losses: 311.096, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
195 |       "Iteration 122: Losses: 279.543, Precision: 0.978, Recall: 1.000, F1-score: 0.989\n",
196 |       "Iteration 123: Losses: 281.207, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
197 |       "Iteration 124: Losses: 268.616, Precision: 0.970, Recall: 1.000, F1-score: 0.985\n",
198 |       "Iteration 125: Losses: 265.647, Precision: 0.980, Recall: 1.000, F1-score: 0.990\n",
199 |       "Iteration 126: Losses: 256.806, Precision: 0.970, Recall: 1.000, F1-score: 0.985\n",
200 |       "Iteration 127: Losses: 279.410, Precision: 0.975, Recall: 1.000, F1-score: 0.987\n",
201 |       "Iteration 128: Losses: 275.790, Precision: 0.981, Recall: 1.000, F1-score: 0.991\n",
202 |       "Iteration 129: Losses: 261.689, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
203 |       "Iteration 130: Losses: 229.473, Precision: 0.977, Recall: 1.000, F1-score: 0.989\n",
204 |       "Iteration 131: Losses: 269.852, Precision: 0.981, Recall: 1.000, F1-score: 0.991\n",
205 |       "Iteration 132: Losses: 254.804, Precision: 0.982, Recall: 1.000, F1-score: 0.991\n",
206 |       "Iteration 133: Losses: 292.561, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
207 |       "Iteration 134: Losses: 299.291, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
208 |       "Iteration 135: Losses: 263.173, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
209 |       "Iteration 136: Losses: 261.140, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
210 |       "Iteration 137: Losses: 275.786, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
211 |       "Iteration 138: Losses: 242.394, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
212 |       "Iteration 139: Losses: 272.513, Precision: 0.983, Recall: 1.000, F1-score: 0.991\n",
213 |       "Iteration 140: Losses: 258.453, Precision: 0.976, Recall: 1.000, F1-score: 0.988\n",
214 |       "Iteration 141: Losses: 248.512, Precision: 0.972, Recall: 1.000, F1-score: 0.986\n",
215 |       "Iteration 142: Losses: 242.651, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
216 |       "Iteration 143: Losses: 256.711, Precision: 0.977, Recall: 1.000, F1-score: 0.988\n",
217 |       "Iteration 144: Losses: 247.515, Precision: 0.980, Recall: 1.000, F1-score: 0.990\n",
218 |       "Iteration 145: Losses: 258.765, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
219 |       "Iteration 146: Losses: 243.136, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
220 |       "Iteration 147: Losses: 269.303, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
221 |       "Iteration 148: Losses: 267.897, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
222 |       "Iteration 149: Losses: 260.078, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
223 |       "Iteration 150: Losses: 234.311, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
224 |       "Iteration 151: Losses: 227.250, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
225 |       "Iteration 152: Losses: 242.148, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
226 |       "Iteration 153: Losses: 235.717, Precision: 0.973, Recall: 1.000, F1-score: 0.986\n",
227 |       "Iteration 154: Losses: 362.445, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
228 |       "Iteration 155: Losses: 267.372, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
229 |       "Iteration 156: Losses: 236.276, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
230 |       "Iteration 157: Losses: 256.003, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
231 |       "Iteration 158: Losses: 219.543, Precision: 0.983, Recall: 1.000, F1-score: 0.992\n",
232 |       "Iteration 159: Losses: 228.927, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
233 |       "Iteration 160: Losses: 198.271, Precision: 0.978, Recall: 1.000, F1-score: 0.989\n",
234 |       "Iteration 161: Losses: 247.528, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
235 |       "Iteration 162: Losses: 228.176, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
236 |       "Iteration 163: Losses: 225.315, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
237 |       "Iteration 164: Losses: 214.318, Precision: 0.981, Recall: 1.000, F1-score: 0.991\n",
238 |       "Iteration 165: Losses: 196.020, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
239 |       "Iteration 166: Losses: 234.131, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
240 |       "Iteration 167: Losses: 219.748, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
241 |       "Iteration 168: Losses: 214.162, Precision: 0.989, Recall: 1.000, F1-score: 0.994\n",
242 |       "Iteration 169: Losses: 243.845, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
243 |       "Iteration 170: Losses: 177.961, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
244 |       "Iteration 171: Losses: 189.173, Precision: 0.980, Recall: 1.000, F1-score: 0.990\n",
245 |       "Iteration 172: Losses: 220.402, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
246 |       "Iteration 173: Losses: 243.295, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
247 |       "Iteration 174: Losses: 227.411, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
248 |       "Iteration 175: Losses: 219.293, Precision: 0.987, Recall: 1.000, F1-score: 0.994\n",
249 |       "Iteration 176: Losses: 190.667, Precision: 0.989, Recall: 1.000, F1-score: 0.994\n",
250 |       "Iteration 177: Losses: 204.839, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
251 |       "Iteration 178: Losses: 207.279, Precision: 0.975, Recall: 1.000, F1-score: 0.987\n",
252 |       "Iteration 179: Losses: 227.621, Precision: 0.982, Recall: 1.000, F1-score: 0.991\n",
253 |       "Iteration 180: Losses: 192.607, Precision: 0.989, Recall: 1.000, F1-score: 0.994\n",
254 |       "Iteration 181: Losses: 207.633, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
255 |       "Iteration 182: Losses: 210.554, Precision: 0.989, Recall: 1.000, F1-score: 0.995\n",
256 |       "Iteration 183: Losses: 209.294, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
257 |       "Iteration 184: Losses: 195.729, Precision: 0.983, Recall: 1.000, F1-score: 0.991\n",
258 |       "Iteration 185: Losses: 238.936, Precision: 0.995, Recall: 1.000, F1-score: 0.997\n",
259 |       "Iteration 186: Losses: 196.003, Precision: 0.989, Recall: 1.000, F1-score: 0.994\n",
260 |       "Iteration 187: Losses: 184.401, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
261 |       "Iteration 188: Losses: 278.631, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
262 |       "Iteration 189: Losses: 206.244, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
263 |       "Iteration 190: Losses: 206.792, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
264 |       "Iteration 191: Losses: 216.515, Precision: 0.983, Recall: 1.000, F1-score: 0.991\n",
265 |       "Iteration 192: Losses: 178.613, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
266 |       "Iteration 193: Losses: 206.766, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
267 |       "Iteration 194: Losses: 192.838, Precision: 0.994, Recall: 1.000, F1-score: 0.997\n",
268 |       "Iteration 195: Losses: 167.583, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
269 |       "Iteration 196: Losses: 174.022, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
270 |       "Iteration 197: Losses: 177.738, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
271 |       "Iteration 198: Losses: 206.007, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
272 |       "Iteration 199: Losses: 194.892, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
273 |       "Iteration 200: Losses: 182.221, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
274 |       "Iteration 201: Losses: 186.950, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
275 |       "Iteration 202: Losses: 203.436, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
276 |       "Iteration 203: Losses: 180.020, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
277 |       "Iteration 204: Losses: 156.148, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
278 |       "Iteration 205: Losses: 192.183, Precision: 0.989, Recall: 1.000, F1-score: 0.994\n",
279 |       "Iteration 206: Losses: 179.260, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
280 |       "Iteration 207: Losses: 159.405, Precision: 0.985, Recall: 1.000, F1-score: 0.992\n",
281 |       "Iteration 208: Losses: 171.970, Precision: 0.995, Recall: 1.000, F1-score: 0.998\n",
282 |       "Iteration 209: Losses: 189.327, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
283 |       "Iteration 210: Losses: 186.480, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
284 |       "Iteration 211: Losses: 161.660, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
285 |       "Iteration 212: Losses: 200.974, Precision: 0.987, Recall: 1.000, F1-score: 0.994\n",
286 |       "Iteration 213: Losses: 179.066, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
287 |       "Iteration 214: Losses: 179.727, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
288 |       "Iteration 215: Losses: 196.743, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
289 |       "Iteration 216: Losses: 179.379, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
290 |       "Iteration 217: Losses: 154.307, Precision: 0.984, Recall: 1.000, F1-score: 0.992\n",
291 |       "Iteration 218: Losses: 182.968, Precision: 0.987, Recall: 1.000, F1-score: 0.994\n",
292 |       "Iteration 219: Losses: 171.672, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
293 |       "Iteration 220: Losses: 193.198, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
294 |       "Iteration 221: Losses: 173.074, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
295 |       "Iteration 222: Losses: 190.361, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
296 |       "Iteration 223: Losses: 180.024, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
297 |       "Iteration 224: Losses: 177.070, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
298 |       "Iteration 225: Losses: 178.802, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
299 |       "Iteration 226: Losses: 159.339, Precision: 0.989, Recall: 1.000, F1-score: 0.994\n",
300 |       "Iteration 227: Losses: 166.651, Precision: 0.993, Recall: 1.000, F1-score: 0.996\n",
301 |       "Iteration 228: Losses: 154.083, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
302 |       "Iteration 229: Losses: 152.684, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
303 |       "Iteration 230: Losses: 179.814, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
304 |       "Iteration 231: Losses: 168.555, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
305 |       "Iteration 232: Losses: 182.348, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
306 |       "Iteration 233: Losses: 175.933, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
307 |       "Iteration 234: Losses: 168.575, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
308 |       "Iteration 235: Losses: 143.038, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
309 |       "Iteration 236: Losses: 161.567, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
310 |       "Iteration 237: Losses: 168.450, Precision: 0.994, Recall: 1.000, F1-score: 0.997\n",
311 |       "Iteration 238: Losses: 166.903, Precision: 0.993, Recall: 1.000, F1-score: 0.997\n",
312 |       "Iteration 239: Losses: 162.112, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
313 |       "Iteration 240: Losses: 158.231, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
314 |       "Iteration 241: Losses: 155.303, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
315 |       "Iteration 242: Losses: 152.772, Precision: 0.989, Recall: 1.000, F1-score: 0.995\n",
316 |       "Iteration 243: Losses: 159.833, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
317 |       "Iteration 244: Losses: 143.507, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
318 |       "Iteration 245: Losses: 182.271, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
319 |       "Iteration 246: Losses: 158.287, Precision: 0.989, Recall: 1.000, F1-score: 0.995\n",
320 |       "Iteration 247: Losses: 174.304, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
321 |       "Iteration 248: Losses: 166.270, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
322 |       "Iteration 249: Losses: 171.564, Precision: 0.993, Recall: 1.000, F1-score: 0.997\n",
323 |       "Iteration 250: Losses: 158.631, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
324 |       "Iteration 251: Losses: 161.692, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
325 |       "Iteration 252: Losses: 154.421, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
326 |       "Iteration 253: Losses: 164.905, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
327 |       "Iteration 254: Losses: 152.181, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
328 |       "Iteration 255: Losses: 141.292, Precision: 0.983, Recall: 1.000, F1-score: 0.992\n",
329 |       "Iteration 256: Losses: 169.921, Precision: 0.995, Recall: 1.000, F1-score: 0.997\n",
330 |       "Iteration 257: Losses: 149.973, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
331 |       "Iteration 258: Losses: 124.414, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
332 |       "Iteration 259: Losses: 151.241, Precision: 0.993, Recall: 1.000, F1-score: 0.997\n",
333 |       "Iteration 260: Losses: 172.780, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
334 |       "Iteration 261: Losses: 159.021, Precision: 0.993, Recall: 1.000, F1-score: 0.996\n",
335 |       "Iteration 262: Losses: 144.074, Precision: 0.986, Recall: 1.000, F1-score: 0.993\n",
336 |       "Iteration 263: Losses: 161.403, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
337 |       "Iteration 264: Losses: 128.211, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
338 |       "Iteration 265: Losses: 146.785, Precision: 0.993, Recall: 1.000, F1-score: 0.997\n",
339 |       "Iteration 266: Losses: 149.536, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
340 |       "Iteration 267: Losses: 160.178, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
341 |       "Iteration 268: Losses: 137.646, Precision: 0.992, Recall: 1.000, F1-score: 0.996\n",
342 |       "Iteration 269: Losses: 158.576, Precision: 0.989, Recall: 1.000, F1-score: 0.995\n",
343 |       "Iteration 270: Losses: 155.475, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
344 |       "Iteration 271: Losses: 175.135, Precision: 0.996, Recall: 1.000, F1-score: 0.998\n",
345 |       "Iteration 272: Losses: 146.622, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
346 |       "Iteration 273: Losses: 162.552, Precision: 0.985, Recall: 1.000, F1-score: 0.993\n",
347 |       "Iteration 274: Losses: 132.417, Precision: 0.987, Recall: 1.000, F1-score: 0.993\n",
348 |       "Iteration 275: Losses: 162.806, Precision: 0.993, Recall: 1.000, F1-score: 0.996\n",
349 |       "Iteration 276: Losses: 148.515, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
350 |       "Iteration 277: Losses: 150.103, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
351 |       "Iteration 278: Losses: 128.127, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
352 |       "Iteration 279: Losses: 164.356, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
353 |       "Iteration 280: Losses: 130.868, Precision: 0.989, Recall: 1.000, F1-score: 0.995\n",
354 |       "Iteration 281: Losses: 166.330, Precision: 0.991, Recall: 1.000, F1-score: 0.996\n",
355 |       "Iteration 282: Losses: 144.873, Precision: 0.995, Recall: 1.000, F1-score: 0.998\n",
356 |       "Iteration 283: Losses: 179.280, Precision: 0.993, Recall: 1.000, F1-score: 0.996\n",
357 |       "Iteration 284: Losses: 145.830, Precision: 0.989, Recall: 1.000, F1-score: 0.995\n",
358 |       "Iteration 285: Losses: 147.223, Precision: 0.993, Recall: 1.000, F1-score: 0.996\n",
359 |       "Iteration 286: Losses: 134.719, Precision: 0.995, Recall: 1.000, F1-score: 0.997\n",
360 |       "Iteration 287: Losses: 148.663, Precision: 0.989, Recall: 1.000, F1-score: 0.994\n",
361 |       "Iteration 288: Losses: 116.841, Precision: 0.996, Recall: 1.000, F1-score: 0.998\n",
362 |       "Iteration 289: Losses: 150.505, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n",
363 |       "Iteration 290: Losses: 117.468, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
364 |       "Iteration 291: Losses: 151.356, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
365 |       "Iteration 292: Losses: 137.203, Precision: 0.995, Recall: 1.000, F1-score: 0.997\n",
366 |       "Iteration 293: Losses: 151.406, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
367 |       "Iteration 294: Losses: 163.338, Precision: 0.995, Recall: 1.000, F1-score: 0.998\n",
368 |       "Iteration 295: Losses: 146.029, Precision: 0.987, Recall: 1.000, F1-score: 0.994\n",
369 |       "Iteration 296: Losses: 111.953, Precision: 0.995, Recall: 1.000, F1-score: 0.998\n",
370 |       "Iteration 297: Losses: 128.676, Precision: 0.996, Recall: 1.000, F1-score: 0.998\n",
371 |       "Iteration 298: Losses: 161.734, Precision: 0.988, Recall: 1.000, F1-score: 0.994\n",
372 |       "Iteration 299: Losses: 152.918, Precision: 0.990, Recall: 1.000, F1-score: 0.995\n",
373 |       "Iteration 300: Losses: 176.403, Precision: 0.991, Recall: 1.000, F1-score: 0.995\n"
374 |      ]
375 |     }
376 |    ],
377 |    "source": [
378 |     "# Initialize the optimizer\n",
379 |     "optimizer = nlp.begin_training()\n",
380 |     "\n",
381 |     "# Training loop\n",
382 |     "n_iter = 300\n",
383 |     "for itn in range(n_iter):\n",
384 |     "    random.shuffle(train_examples)\n",
385 |     "    losses = {}\n",
386 |     "    # Batch up the examples using spaCy's minibatch\n",
387 |     "    batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))\n",
388 |     "    for batch in batches:\n",
389 |     "        nlp.update(\n",
390 |     "            batch,  # batch of Example objects\n",
391 |     "            drop=0.2,  # dropout - make it harder to memorise data\n",
392 |     "            sgd=optimizer,  # callable to update weights\n",
393 |     "            losses=losses\n",
394 |     "        )\n",
395 |     "    scores = nlp.evaluate(train_examples)\n",
396 |     "    ents_p = scores[\"ents_p\"]\n",
397 |     "    ents_r = scores[\"ents_r\"]\n",
398 |     "    ents_f = scores[\"ents_f\"]\n",
399 |     "\n",
400 |     "    print(f\"Iteration {itn+1}: Losses: {losses['ner']:.3f}, Precision: {ents_p:.3f}, Recall: {ents_r:.3f}, F1-score: {ents_f:.3f}\")\n",
401 |     "\n",
402 |     "# Save the model\n",
403 |     "nlp.to_disk(\"ner_model\")"
404 |    ]
405 |   }
406 |  ],
407 |  "metadata": {
408 |   "kernelspec": {
409 |    "display_name": "Python 3",
410 |    "language": "python",
411 |    "name": "python3"
412 |   },
413 |   "language_info": {
414 |    "codemirror_mode": {
415 |     "name": "ipython",
416 |     "version": 3
417 |    },
418 |    "file_extension": ".py",
419 |    "mimetype": "text/x-python",
420 |    "name": "python",
421 |    "nbconvert_exporter": "python",
422 |    "pygments_lexer": "ipython3",
423 |    "version": "3.11.7"
424 |   }
425 |  },
426 |  "nbformat": 4,
427 |  "nbformat_minor": 2
428 | }
429 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align='center'>
 2 |   <h1>📑ATS Scoring System📑</h1>
 3 |   </div>
 4 | 
 5 | ## 🌟 Project Overview
 6 | The ATS Scoring System is designed to parse resumes, extract entities and keywords, and score resumes based on the found keywords. The system utilizes a spaCy model trained on a [Kaggle dataset](https://www.kaggle.com/datasets/dataturks/resume-entities-for-ner) to identify and score important keywords in resumes. Additionally, it provides suggestions for improvement and displays extracted entities. The entire application is built using Streamlit, allowing users to interact with the system through a web interface.
 7 | 
 8 | ## ✨ Features
 9 | - 📄 **Resume Parsing**: Extract text from various formats (PDF, DOCX, TXT).
10 | - 🔍 **Entity Extraction**: Identify and extract entities from resumes using a spaCy model.
11 | - 💯 **Keyword Scoring**: Evaluate the resume based on the presence of relevant keywords and provide a score.
12 | - 💡 **Suggestions**: Offer suggestions to improve the resume based on the extracted entities and keywords.
13 | - 📊 **Visualization**: Display the extracted entities and keywords in a user-friendly format.
14 | 
15 | ## 🛠️ Technologies Used
16 | - 🧠 **spaCy**: For natural language processing and entity recognition.
17 | - 🐼 **pandas**: For handling and processing data.
18 | - 📚 **pdfplumber**: To extract text from PDF files.
19 | - 📝 **docx**: To extract text from DOCX files.
20 | - 🌐 **Streamlit**: To create a web-based interface for interacting with the ATS scoring system.
21 | 
22 | ## 🚀 Installation
23 | 1. Clone the repository:
24 |    ```bash
25 |    git clone https://github.com/miteshgupta07/ATS-Scoring-System.git
26 |  
27 | 2. Navigate to the project directory:
28 | 
29 |     ```bash
30 |    cd ats-scoring-system
31 | 
32 | 3. Install the required packages:
33 |    ```bash
34 |    pip install -r requirements.txt
35 | 
36 | ## 🖥️Usage
37 | 1. Run the Streamlit app:
38 | 
39 |   ```bash
40 |   streamlit run app.py
41 | ```
42 | 
43 | 2. Upload your resume in PDF, DOCX, or TXT format.
44 | 
45 | 3. Enter the job description for comparison.
46 | 
47 | 4. View the ATS score, Suggestions, and extracted entities.
48 | 
49 | ## 📜License
50 | This project is licensed under the MIT License - see the [LICENSE](https://github.com/miteshgupta07/ATS-Scoring-System/blob/main/LICENSE) file for details.
51 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pdfplumber
  3 | from docx import Document
  4 | import spacy
  5 | import re
  6 | import pandas as pd
  7 | import time
  8 | 
  9 | st.write("# **Applicant Tracking System (ATS)📑**")
 10 | 
 11 | uploaded_resume=st.file_uploader("Upload CV/Resume",['pdf','docx','txt'])
 12 | job_description=st.text_area('Enter Job Description')
 13 | 
 14 | def extract_text_from_pdf(pdf_file):
 15 |     text = ""
 16 |     with pdfplumber.open(pdf_file) as pdf:
 17 | 
 18 |         for page in pdf.pages:
 19 |             text += page.extract_text()
 20 |     return text
 21 | 
 22 | def extract_text_from_txt(text_file):
 23 |     with open(text_file,'r') as txt_file:
 24 |         text=txt_file.read()
 25 |     return text
 26 | 
 27 | def extract_text_from_docx(word_file):
 28 |     doc = Document(word_file)
 29 |     text = ""
 30 |     for para in doc.paragraphs:
 31 |         text += para.text
 32 |     return text
 33 | 
 34 | def preprocess_text(text):
 35 |     replacements = {
 36 |         'â€“': '–',    # en dash
 37 |         'â€”': '—',    # em dash
 38 |         'â€˜': '‘',    # left single quotation mark
 39 |         'â€™': '’',    # right single quotation mark
 40 |         'â€œ': '“',    # left double quotation mark
 41 |         'â€�': '”',    # right double quotation mark
 42 |         'â€¢': '•',    # bullet point
 43 |         'â€¦': '…',    # ellipsis
 44 |         'Ã©': 'é',     # é
 45 |         'Ã¨': 'è',     # è
 46 |         'Ã¢': 'â',     # â
 47 |         'Ã´': 'ô',     # ô
 48 |         'Ã¼': 'ü',     # ü
 49 |         'Ã±': 'ñ',     # ñ
 50 |         'Ã‹': 'Ë',     # Ë
 51 |         'Ã¡': 'á',     # á
 52 |         'Ãº': 'ú',     # ú
 53 |         'Ã®': 'î',     # î
 54 |         'Ã€': 'À',     # À
 55 |         'Ã¬': 'ì',     # ì
 56 |         'Ã™': 'Ù',     # Ù
 57 |         'Ã': 'Í',     # Í
 58 |         'Ã–': 'Ö',     # Ö
 59 |         'Ã': 'Á',     # Á
 60 |         'ÃŒ': 'Ì',     # Ì
 61 |         'Ã‰': 'É',     # É
 62 |         'Ã': 'Ï',     # Ï
 63 |         'Ã«': 'ë',     # ë
 64 |         'Ã³': 'ó',     # ó
 65 |         'Ãž': 'Þ',     # Þ
 66 |         'Ãš': 'Ú',     # Ú
 67 |         'Ã¦': 'æ',     # æ
 68 |         'Ã˜': 'Ø',     # Ø
 69 |         'ÃŸ': 'ß',     # ß
 70 |         'Ã°': 'ð',     # ð
 71 |         'Ã­': 'í',     # í
 72 |         'Ãµ': 'õ',     # õ
 73 |         'Ã¥': 'å',     # å
 74 |         'Ã¯': 'ï',     # ï
 75 |         'Ã£': 'ã',     # ã
 76 |         'Ã¤': 'ä',     # ä
 77 |         'Ã¶': 'ö',     # ö
 78 |         'Ã¼': 'ü',     # ü
 79 |         'â‚¬': '€',    # Euro sign
 80 |         'â„¢': '™',    # Trademark sign
 81 |         'âˆ‚': '∂',    # Partial differential
 82 |         'âˆ€': '∀',    # For all
 83 |         'âˆˆ': '∈',    # Element of
 84 |         'âˆƒ': '∃',    # There exists
 85 |         'âˆ…': '∅',    # Empty set
 86 |         'âˆ†': '∆',    # Increment
 87 |         'âˆ‡': '∇',    # Nabla
 88 |         'âˆ‘': '∑',    # N-ary summation
 89 |         'âˆ—': '∗',    # Asterisk operator
 90 |         'âˆ˜': '∘',    # Ring operator
 91 |         'âˆ™': '∙',    # Bullet operator
 92 |         'âˆš': '√',    # Square root
 93 |         'âˆ›': '∧',    # Logical and
 94 |         'âˆ¥': '∥',    # Parallel to
 95 |         'âˆ¼': '∼',    # Tilde operator
 96 |         'âˆ¾': '≀',    # Wreath product
 97 |         'âˆ¿': '≁',    # Not tilde
 98 |         'âˆ‹': '⊂',    # Subset of
 99 |         'âˆ›': '⊃',    # Superset of
100 |         'â‰': '≠',     # Not equal to
101 |         'â‰¤': '≤',    # Less-than or equal to
102 |         'â‰¥': '≥',    # Greater-than or equal to
103 |         'â‰¤': '≤',    # Less-than or equal to
104 |         'â‰≥': '≥',    # Greater-than or equal to
105 |         'â‰²': '²',    # Superscript two
106 |         'â‰³': '³',    # Superscript three
107 |         'â‰®': '≡',    # Identical to
108 |         'â‰³': '≥',    # Greater-than or equal to
109 |         'â‰¯': '≣',    # Equivalent to
110 |         'â‰¤': '≤',    # Less-than or equal to
111 |         'â‰³': '≥',    # Greater-than or equal to
112 |         'â‰®': '≡',    # Identical to
113 |     }
114 |     
115 |     # Replace the characters in the text
116 |     for wrong_char, correct_char in replacements.items():
117 |         text = text.replace(wrong_char, correct_char)
118 |     
119 |     # Remove newline characters and any extraneous whitespace
120 |     text = re.sub(r'\n+', ' ', text)  # Replace multiple newlines with a single space
121 |     text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
122 |     processed_text = text.strip()  # Remove leading and trailing whitespace
123 |     
124 |     return processed_text
125 | 
126 | def extract_entities(text):
127 |     nlp=spacy.load('./ner_model')
128 |     doc=nlp(text)
129 |     data = []
130 |     for ent in doc.ents:
131 |         data.append([ent.label_, ent.text])
132 | 
133 |     return data
134 | 
135 | def find_not_found_keywords(resume_keywords,job_keywords):
136 |     nf_keywords=[]
137 | 
138 |     for keywords in resume_keywords:
139 |         if keywords[0] not in job_keywords:
140 |             nf_keywords.append(keywords[0])
141 |     return nf_keywords
142 | 
143 | def show_ATS_score(text,job_description):
144 |     resume_keywords=extract_entities(text)
145 |     job_keywords=extract_entities(job_description)
146 |     resumek_count=len(resume_keywords)
147 |     jobk_count=len(job_keywords)
148 |     if jobk_count!=0:
149 |         score=(resumek_count/jobk_count)*100
150 |         score=round(score,2)
151 |     else:
152 |         score=-1
153 |     not_found_keywords=find_not_found_keywords(resume_keywords,job_keywords)
154 |     return score,not_found_keywords
155 | 
156 | def show_entities(text):
157 |     data=extract_entities(text)
158 |     df = pd.DataFrame(data, columns=["Label", "Entity"])
159 |     st.table(df)
160 | 
161 | def show_suggestion(keywords):
162 |     improvement_suggestions = {
163 |         "NAME": "• **Name:** Including your full name helps recruiters easily identify you.",
164 |         "EMAIL_ADDRESS": "• **Email Address:** Providing an email address makes it easier for recruiters to contact you.",
165 |         "LOCATION": "• **Location:** Including your location helps recruiters find candidates in specific geographic areas.",
166 |         "DEGREE": "• **Degree:** Listing your degree demonstrates your educational qualifications.",
167 |         "GRADUATION_YEAR": "• **Graduation Year:** Including your graduation year helps recruiters understand your experience level.",
168 |         "COLLEGE_NAME": "• **College/University Name:** Mentioning your college or university name adds credibility and context to your educational background.",
169 |         "SKILLS": "• **Skills:** Highlighting your skills shows your expertise and can make your resume stand out.",
170 |         "WORK_EXPERIENCE": "• **Work Experience:** Detailing your previous job roles and responsibilities provides insight into your professional background.",
171 |         "CERTIFICATIONS": "• **Certifications:** Adding relevant certifications can showcase your additional qualifications and specialized knowledge.",
172 |         "PROJECTS": "• **Projects:** Describing significant projects you have worked on can demonstrate your practical experience and problem-solving abilities.",
173 |         "LANGUAGES": "• **Languages:** Including languages you are proficient in can be advantageous, especially for roles requiring multilingual skills.",
174 |         "LINKEDIN_PROFILE": "• **LinkedIn Profile:** Providing a link to your LinkedIn profile can give recruiters a more comprehensive view of your professional network and endorsements.",
175 |         "PROFESSIONAL_SUMMARY": "• **Professional Summary:** Writing a concise professional summary at the top of your resume can quickly convey your key strengths and career objectives.",
176 |         "DESIGNATION": "• **Designation:** Specifying your current or desired job title helps recruiters understand your career level and aspirations."
177 |         }
178 |     
179 |     st.write("#### You can add the following details to improve your score:")
180 |     for i in keywords:
181 |         st.write(improvement_suggestions[i])
182 | 
183 | def main(uploaded_resume,job_description):
184 |     if uploaded_resume is not None:
185 |         doc_type=uploaded_resume.type
186 |         if doc_type=='invalid':
187 |             st.error("CV/Resume should be in PDF/DOCX/TXT format")
188 |         else:
189 |             if doc_type=='application/pdf':
190 |                 text=extract_text_from_pdf(uploaded_resume)
191 |             elif doc_type=='text/plain':
192 |                 text=extract_text_from_docx(uploaded_resume)
193 |             else:
194 |                 text=extract_text_from_docx(uploaded_resume)
195 | 
196 |         if text and not job_description:
197 |             if st.checkbox('Show Entities'):
198 |                 st.write("### Resume Keywords")
199 |                 show_entities(text)
200 | 
201 |         if text and job_description:
202 |             text=preprocess_text(text)
203 |             job_description=preprocess_text(job_description)
204 | 
205 |             progress_bar=st.progress(0)
206 |             for i in range(100):
207 |                 time.sleep(0.01)  
208 |                 progress_bar.progress(i + 1) 
209 |             time.sleep(1)
210 | 
211 |             ats_score,not_found_keywords=show_ATS_score(text,job_description)
212 | 
213 |             if ats_score==-1:
214 |                 st.warning("No Keywords Found in Job Description")
215 |             else:
216 |                 if 0<ats_score<=50:
217 |                     st.error(f"Your ATS Score is{ats_score} out of 100")
218 |                     st.write("Your score is Low")
219 | 
220 |                 elif 50<ats_score<=75:
221 |                     st.warning(f"Your ATS Score is {ats_score} out of 100")
222 |                     st.write("Your score is Average")
223 | 
224 |                 else:
225 |                     st.success(f"Your ATS Score is {ats_score} out of 100")
226 |                     st.write("Your score is Good")
227 | 
228 |             
229 |             if len(not_found_keywords)!=0:
230 |                 st.write('## *Suggestion Based on your Resume*')
231 |                 show_suggestion(not_found_keywords)
232 | 
233 |             if st.checkbox('Show All Entities'):
234 |                 col1,col2=st.columns(2)
235 |                 with col1:
236 |                     st.write("### Resume Keywords")
237 |                     show_entities(text)
238 |                 with col2:
239 |                     st.write("### Job Description Keywords")
240 |                     show_entities(job_description)
241 | 
242 | main(uploaded_resume,job_description) 


--------------------------------------------------------------------------------
/base_config.cfg:
--------------------------------------------------------------------------------
 1 | # This is an auto-generated partial config. To use it with 'spacy train'
 2 | # you can run spacy init fill-config to auto-fill all default settings:
 3 | # python -m spacy init fill-config ./base_config.cfg ./config.cfg
 4 | [paths]
 5 | train = null
 6 | dev = null
 7 | vectors = "en_core_web_lg"
 8 | [system]
 9 | gpu_allocator = null
10 | 
11 | [nlp]
12 | lang = "en"
13 | pipeline = ["tok2vec","ner"]
14 | batch_size = 64
15 | 
16 | [components]
17 | 
18 | [components.tok2vec]
19 | factory = "tok2vec"
20 | 
21 | [components.tok2vec.model]
22 | @architectures = "spacy.Tok2Vec.v2"
23 | 
24 | [components.tok2vec.model.embed]
25 | @architectures = "spacy.MultiHashEmbed.v2"
26 | width = ${components.tok2vec.model.encode.width}
27 | attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
28 | rows = [5000, 1000, 2500, 2500]
29 | include_static_vectors = true
30 | 
31 | [components.tok2vec.model.encode]
32 | @architectures = "spacy.MaxoutWindowEncoder.v2"
33 | width = 256
34 | depth = 8
35 | window_size = 1
36 | maxout_pieces = 3
37 | 
38 | [components.ner]
39 | factory = "ner"
40 | 
41 | [components.ner.model]
42 | @architectures = "spacy.TransitionBasedParser.v2"
43 | state_type = "ner"
44 | extra_state_tokens = false
45 | hidden_width = 64
46 | maxout_pieces = 2
47 | use_upper = true
48 | nO = null
49 | 
50 | [components.ner.model.tok2vec]
51 | @architectures = "spacy.Tok2VecListener.v1"
52 | width = ${components.tok2vec.model.encode.width}
53 | 
54 | [corpora]
55 | 
56 | [corpora.train]
57 | @readers = "spacy.Corpus.v1"
58 | path = ${paths.train}
59 | max_length = 0
60 | 
61 | [corpora.dev]
62 | @readers = "spacy.Corpus.v1"
63 | path = ${paths.dev}
64 | max_length = 0
65 | 
66 | [training]
67 | dev_corpus = "corpora.dev"
68 | train_corpus = "corpora.train"
69 | 
70 | [training.optimizer]
71 | @optimizers = "Adam.v1"
72 | 
73 | [training.batcher]
74 | @batchers = "spacy.batch_by_words.v1"
75 | discard_oversize = false
76 | tolerance = 0.2
77 | 
78 | [training.batcher.size]
79 | @schedules = "compounding.v1"
80 | start = 100
81 | stop = 1000
82 | compound = 1.001
83 | 
84 | [initialize]
85 | vectors = ${paths.vectors}


--------------------------------------------------------------------------------
/data/test/Alice Clark CV.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/data/test/Alice Clark CV.docx


--------------------------------------------------------------------------------
/data/test/Alice Clark CV.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/data/test/Alice Clark CV.pdf


--------------------------------------------------------------------------------
/data/test/Alice Clark CV.txt:
--------------------------------------------------------------------------------
 1 | Alice Clark 
 2 | AI / Machine Learning 
 3 |  
 4 | Delhi, India Email me on Indeed 
 5 | • 
 6 | 20+ years of experience in data handling, design, and development 
 7 | • 
 8 | Data Warehouse: Data analysis, star/snow flake scema data modelling and design specific to 
 9 | data warehousing and business intelligence 
10 | • 
11 | Database: Experience in database designing, scalability, back-up and recovery, writing and 
12 | optimizing SQL code and Stored Procedures, creating functions, views, triggers and indexes. 
13 | Cloud platform: Worked on Microsoft Azure cloud services like Document DB, SQL Azure, 
14 | Stream Analytics, Event hub, Power BI, Web Job, Web App, Power BI, Azure data lake 
15 | analytics(U-SQL) 
16 | Willing to relocate anywhere 
17 |  
18 | WORK EXPERIENCE 
19 | Software Engineer 
20 | Microsoft – Bangalore, Karnataka 
21 | January 2000 to Present 
22 | 1. Microsoft Rewards Live dashboards: 
23 | Description: - Microsoft rewards is loyalty program that rewards Users for browsing and shopping 
24 | online. Microsoft Rewards members can earn points when searching with Bing, browsing with 
25 | Microsoft Edge and making purchases at the Xbox Store, the Windows Store and the Microsoft 
26 | Store. Plus, user can pick up bonus points for taking daily quizzes and tours on the Microsoft 
27 | rewards website. Rewards live dashboards gives a live picture of usage world-wide and by 
28 | markets like US, Canada, Australia, new user registration count, top/bottom performing rewards 
29 | offers, orders stats and weekly trends of user activities, orders and new user registrations. the 
30 | PBI tiles gets refreshed in different frequencies starting from 5 seconds to 30 minutes. 
31 | Technology/Tools used 
32 |  
33 | EDUCATION 
34 | Indian Institute of Technology – Mumbai 
35 | 2001 
36 |  
37 | SKILLS 
38 | Machine Learning, Natural Language Processing, and Big Data Handling 
39 |  
40 | ADDITIONAL INFORMATION 
41 | Professional Skills 
42 | • Excellent analytical, problem solving, communication, knowledge transfer and interpersonal 
43 | skills with ability to interact with individuals at all the levels 
44 | • Quick learner and maintains cordial relationship with project manager and team members and 
45 | good performer both in team and independent job environments 
46 | • Positive attitude towards superiors &amp; peers 
47 | • Supervised junior developers throughout project lifecycle and provided technical assistance 
48 | 


--------------------------------------------------------------------------------
/data/test/Smith Resume.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/data/test/Smith Resume.docx


--------------------------------------------------------------------------------
/data/test/Smith Resume.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/data/test/Smith Resume.pdf


--------------------------------------------------------------------------------
/ner_model/config.cfg:
--------------------------------------------------------------------------------
  1 | [paths]
  2 | train = null
  3 | dev = null
  4 | vectors = null
  5 | init_tok2vec = null
  6 | 
  7 | [system]
  8 | seed = 0
  9 | gpu_allocator = null
 10 | 
 11 | [nlp]
 12 | lang = "en"
 13 | pipeline = ["ner"]
 14 | disabled = []
 15 | before_creation = null
 16 | after_creation = null
 17 | after_pipeline_creation = null
 18 | batch_size = 1000
 19 | tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 20 | vectors = {"@vectors":"spacy.Vectors.v1"}
 21 | 
 22 | [components]
 23 | 
 24 | [components.ner]
 25 | factory = "ner"
 26 | incorrect_spans_key = null
 27 | moves = null
 28 | scorer = {"@scorers":"spacy.ner_scorer.v1"}
 29 | update_with_oracle_cut_size = 100
 30 | 
 31 | [components.ner.model]
 32 | @architectures = "spacy.TransitionBasedParser.v2"
 33 | state_type = "ner"
 34 | extra_state_tokens = false
 35 | hidden_width = 64
 36 | maxout_pieces = 2
 37 | use_upper = true
 38 | nO = null
 39 | 
 40 | [components.ner.model.tok2vec]
 41 | @architectures = "spacy.HashEmbedCNN.v2"
 42 | pretrained_vectors = null
 43 | width = 96
 44 | depth = 4
 45 | embed_size = 2000
 46 | window_size = 1
 47 | maxout_pieces = 3
 48 | subword_features = true
 49 | 
 50 | [corpora]
 51 | 
 52 | [corpora.dev]
 53 | @readers = "spacy.Corpus.v1"
 54 | path = ${paths.dev}
 55 | gold_preproc = false
 56 | max_length = 0
 57 | limit = 0
 58 | augmenter = null
 59 | 
 60 | [corpora.train]
 61 | @readers = "spacy.Corpus.v1"
 62 | path = ${paths.train}
 63 | gold_preproc = false
 64 | max_length = 0
 65 | limit = 0
 66 | augmenter = null
 67 | 
 68 | [training]
 69 | seed = ${system.seed}
 70 | gpu_allocator = ${system.gpu_allocator}
 71 | dropout = 0.1
 72 | accumulate_gradient = 1
 73 | patience = 1600
 74 | max_epochs = 0
 75 | max_steps = 20000
 76 | eval_frequency = 200
 77 | frozen_components = []
 78 | annotating_components = []
 79 | dev_corpus = "corpora.dev"
 80 | train_corpus = "corpora.train"
 81 | before_to_disk = null
 82 | before_update = null
 83 | 
 84 | [training.batcher]
 85 | @batchers = "spacy.batch_by_words.v1"
 86 | discard_oversize = false
 87 | tolerance = 0.2
 88 | get_length = null
 89 | 
 90 | [training.batcher.size]
 91 | @schedules = "compounding.v1"
 92 | start = 100
 93 | stop = 1000
 94 | compound = 1.001
 95 | t = 0.0
 96 | 
 97 | [training.logger]
 98 | @loggers = "spacy.ConsoleLogger.v1"
 99 | progress_bar = false
100 | 
101 | [training.optimizer]
102 | @optimizers = "Adam.v1"
103 | beta1 = 0.9
104 | beta2 = 0.999
105 | L2_is_weight_decay = true
106 | L2 = 0.01
107 | grad_clip = 1.0
108 | use_averages = false
109 | eps = 0.00000001
110 | learn_rate = 0.001
111 | 
112 | [training.score_weights]
113 | ents_f = 1.0
114 | ents_p = 0.0
115 | ents_r = 0.0
116 | ents_per_type = null
117 | 
118 | [pretraining]
119 | 
120 | [initialize]
121 | vectors = ${paths.vectors}
122 | init_tok2vec = ${paths.init_tok2vec}
123 | vocab_data = null
124 | lookups = null
125 | before_init = null
126 | after_init = null
127 | 
128 | [initialize.components]
129 | 
130 | [initialize.tokenizer]


--------------------------------------------------------------------------------
/ner_model/meta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "lang":"en",
 3 |   "name":"pipeline",
 4 |   "version":"0.0.0",
 5 |   "spacy_version":">=3.7.5,<3.8.0",
 6 |   "description":"",
 7 |   "author":"",
 8 |   "email":"",
 9 |   "url":"",
10 |   "license":"",
11 |   "spacy_git_version":"a6d0fc360",
12 |   "vectors":{
13 |     "width":0,
14 |     "vectors":0,
15 |     "keys":0,
16 |     "name":null,
17 |     "mode":"default"
18 |   },
19 |   "labels":{
20 |     "ner":[
21 |       "COLLEGE_NAME",
22 |       "COMPANIES_WORKED_AT",
23 |       "DEGREE",
24 |       "DESIGNATION",
25 |       "EMAIL_ADDRESS",
26 |       "GRADUATION_YEAR",
27 |       "LOCATION",
28 |       "NAME",
29 |       "SKILLS",
30 |       "UNKNOWN",
31 |       "YEARS_OF_EXPERIENCE"
32 |     ]
33 |   },
34 |   "pipeline":[
35 |     "ner"
36 |   ],
37 |   "components":[
38 |     "ner"
39 |   ],
40 |   "disabled":[
41 | 
42 |   ]
43 | }


--------------------------------------------------------------------------------
/ner_model/ner/cfg:
--------------------------------------------------------------------------------
 1 | {
 2 |   "moves":null,
 3 |   "update_with_oracle_cut_size":100,
 4 |   "multitasks":[
 5 | 
 6 |   ],
 7 |   "min_action_freq":1,
 8 |   "learn_tokens":false,
 9 |   "beam_width":1,
10 |   "beam_density":0.0,
11 |   "beam_update_prob":0.0,
12 |   "incorrect_spans_key":null
13 | }


--------------------------------------------------------------------------------
/ner_model/ner/model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/ner_model/ner/model


--------------------------------------------------------------------------------
/ner_model/ner/moves:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/ner_model/ner/moves


--------------------------------------------------------------------------------
/ner_model/tokenizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/ner_model/tokenizer


--------------------------------------------------------------------------------
/ner_model/vocab/key2row:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/ner_model/vocab/key2row


--------------------------------------------------------------------------------
/ner_model/vocab/lookups.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/ner_model/vocab/lookups.bin


--------------------------------------------------------------------------------
/ner_model/vocab/vectors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miteshgupta07/ATS-Scoring-System/49ec0e353cde20189c49cd4c6a4cb64a3b926f2d/ner_model/vocab/vectors


--------------------------------------------------------------------------------
/ner_model/vocab/vectors.cfg:
--------------------------------------------------------------------------------
1 | {
2 |   "mode":"default"
3 | }


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | streamlit==1.36.0
2 | pdfplumber==0.11.2
3 | docx==1.1.2
4 | spacy==3.7.5
5 | pandas==2.2.2
6 | 


--------------------------------------------------------------------------------