└── semi  supervised classifier.ipynb


/semi  supervised classifier.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 6,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "Accuracy: 1.0000\n",
13 |       "Precision: 1.0000\n",
14 |       "Recall: 1.0000\n"
15 |      ]
16 |     }
17 |    ],
18 |    "source": [
19 |     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
20 |     "from sklearn.semi_supervised import LabelPropagation\n",
21 |     "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
22 |     "import numpy as np\n",
23 |     "\n",
24 |     "labeled_data = [\n",
25 |     "    (\"This is a document about sports\", \"Sports\"),\n",
26 |     "    (\"This is a news article\", \"News\"),\n",
27 |     "    (\"Another document about sports\", \"Sports\"),\n",
28 |     "    (\"A text sample about politics\", \"Politics\"),\n",
29 |     "    (\"A document discussing music\", \"Music\")\n",
30 |     "]\n",
31 |     "unlabeled_data = [\n",
32 |     "    \"This document discusses machine learning\",\n",
33 |     "    \"Another document about music\",\n",
34 |     "    \"A short text sample\"\n",
35 |     "]\n",
36 |     "\n",
37 |     "all_data = [text for text, _ in labeled_data] + unlabeled_data\n",
38 |     "texts, labels = zip(*labeled_data)\n",
39 |     "vectorizer = TfidfVectorizer(max_features=500)\n",
40 |     "features = vectorizer.fit_transform(all_data)\n",
41 |     "features_dense = features.toarray()\n",
42 |     "all_labels = sorted(set(labels))\n",
43 |     "label_distributions = np.zeros((len(texts), len(all_labels)))\n",
44 |     "for i, label in enumerate(labels):\n",
45 |     "    label_distributions[i, all_labels.index(label)] = 1\n",
46 |     "\n",
47 |     "X_train = features_dense[:len(texts)]\n",
48 |     "y_train = labels\n",
49 |     "X_test = X_train\n",
50 |     "y_test = y_train\n",
51 |     "\n",
52 |     "y_train_indices = np.array([all_labels.index(label) for label in y_train])\n",
53 |     "semi_clf = LabelPropagation()\n",
54 |     "semi_clf.fit(X_train, y_train_indices)\n",
55 |     "predictions = semi_clf.predict(X_test)\n",
56 |     "accuracy = accuracy_score(np.array([all_labels.index(label) for label in y_test]), predictions)\n",
57 |     "precision = precision_score(np.array([all_labels.index(label) for label in y_test]), predictions, average='weighted')\n",
58 |     "recall = recall_score(np.array([all_labels.index(label) for label in y_test]), predictions, average='weighted')\n",
59 |     "print(f\"Accuracy: {accuracy:.4f}\")\n",
60 |     "print(f\"Precision: {precision:.4f}\")\n",
61 |     "print(f\"Recall: {recall:.4f}\")\n"
62 |    ]
63 |   }
64 |  ],
65 |  "metadata": {
66 |   "kernelspec": {
67 |    "display_name": "Python 3",
68 |    "language": "python",
69 |    "name": "python3"
70 |   },
71 |   "language_info": {
72 |    "codemirror_mode": {
73 |     "name": "ipython",
74 |     "version": 3
75 |    },
76 |    "file_extension": ".py",
77 |    "mimetype": "text/x-python",
78 |    "name": "python",
79 |    "nbconvert_exporter": "python",
80 |    "pygments_lexer": "ipython3",
81 |    "version": "3.12.0"
82 |   }
83 |  },
84 |  "nbformat": 4,
85 |  "nbformat_minor": 2
86 | }
87 | 


--------------------------------------------------------------------------------