├── README.md
├── bert_text_classification.ipynb
└── imdb-sample.pickle


/README.md:
--------------------------------------------------------------------------------
1 | # demo-text-binary-classification-with-bert


--------------------------------------------------------------------------------
/bert_text_classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "bert_text_classification.ipynb",
  7 |       "version": "0.3.2",
  8 |       "provenance": [],
  9 |       "collapsed_sections": [],
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "accelerator": "GPU"
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/wshuyi/demo-text-binary-classification-with-bert/blob/master/bert_text_classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "metadata": {
 31 |         "id": "zwYoyLfLQI-t",
 32 |         "colab_type": "text"
 33 |       },
 34 |       "cell_type": "markdown",
 35 |       "source": [
 36 |         "base code borrowed from [this Google Colab Notebook](https://colab.research.google.com/github/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb).\n",
 37 |         "\n",
 38 |         "Refactored by [Shuyi Wang](https://www.linkedin.com/in/shuyi-wang-b3955026/) and [Yan Sun](https://github.com/SunYanCN)\n",
 39 |         "\n",
 40 |         "Please refer to [this Medium Article](https://medium.com/@wshuyi/how-to-do-text-binary-classification-with-bert-f1348a25d905) for detailed information.\n",
 41 |         "\n"
 42 |       ]
 43 |     },
 44 |     {
 45 |       "metadata": {
 46 |         "id": "EQHcJlAgHZ2o",
 47 |         "colab_type": "code",
 48 |         "colab": {}
 49 |       },
 50 |       "cell_type": "code",
 51 |       "source": [
 52 |         "!pip install bert-text"
 53 |       ],
 54 |       "execution_count": 0,
 55 |       "outputs": []
 56 |     },
 57 |     {
 58 |       "metadata": {
 59 |         "id": "oZFDseXWHiSz",
 60 |         "colab_type": "code",
 61 |         "colab": {}
 62 |       },
 63 |       "cell_type": "code",
 64 |       "source": [
 65 |         "from bert_text import run_on_dfs"
 66 |       ],
 67 |       "execution_count": 0,
 68 |       "outputs": []
 69 |     },
 70 |     {
 71 |       "metadata": {
 72 |         "id": "R4pBwSidH4CT",
 73 |         "colab_type": "code",
 74 |         "colab": {}
 75 |       },
 76 |       "cell_type": "code",
 77 |       "source": [
 78 |         "import pickle"
 79 |       ],
 80 |       "execution_count": 0,
 81 |       "outputs": []
 82 |     },
 83 |     {
 84 |       "metadata": {
 85 |         "id": "wiOyfxH6H5ry",
 86 |         "colab_type": "code",
 87 |         "colab": {}
 88 |       },
 89 |       "cell_type": "code",
 90 |       "source": [
 91 |         "!wget https://github.com/wshuyi/info-5731-public/raw/master/imdb-sample.pickle"
 92 |       ],
 93 |       "execution_count": 0,
 94 |       "outputs": []
 95 |     },
 96 |     {
 97 |       "metadata": {
 98 |         "id": "vEGtiE42IDyj",
 99 |         "colab_type": "code",
100 |         "colab": {}
101 |       },
102 |       "cell_type": "code",
103 |       "source": [
104 |         "with open(\"imdb-sample.pickle\", 'rb') as f:\n",
105 |         "  train, test = pickle.load(f)"
106 |       ],
107 |       "execution_count": 0,
108 |       "outputs": []
109 |     },
110 |     {
111 |       "metadata": {
112 |         "id": "P6wlSR8NIKHS",
113 |         "colab_type": "code",
114 |         "colab": {}
115 |       },
116 |       "cell_type": "code",
117 |       "source": [
118 |         "train = train.sample(len(train))"
119 |       ],
120 |       "execution_count": 0,
121 |       "outputs": []
122 |     },
123 |     {
124 |       "metadata": {
125 |         "id": "qCoK42AGIXsS",
126 |         "colab_type": "code",
127 |         "colab": {}
128 |       },
129 |       "cell_type": "code",
130 |       "source": [
131 |         "myparam = {\n",
132 |         "    \"DATA_COLUMN\": \"text\",\n",
133 |         "    \"LABEL_COLUMN\": \"sentiment\",\n",
134 |         "    \"LEARNING_RATE\": 2e-5,\n",
135 |         "    \"NUM_TRAIN_EPOCHS\": 3\n",
136 |         "}"
137 |       ],
138 |       "execution_count": 0,
139 |       "outputs": []
140 |     },
141 |     {
142 |       "metadata": {
143 |         "id": "SUwqGJuIfgap",
144 |         "colab_type": "code",
145 |         "colab": {}
146 |       },
147 |       "cell_type": "code",
148 |       "source": [
149 |         "import tensorflow as tf\n",
150 |         "tf.logging.set_verbosity(tf.logging.INFO)"
151 |       ],
152 |       "execution_count": 0,
153 |       "outputs": []
154 |     },
155 |     {
156 |       "metadata": {
157 |         "id": "YoOVNBr7IsjS",
158 |         "colab_type": "code",
159 |         "colab": {}
160 |       },
161 |       "cell_type": "code",
162 |       "source": [
163 |         "result, estimator = run_on_dfs(train, test, **myparam)"
164 |       ],
165 |       "execution_count": 0,
166 |       "outputs": []
167 |     },
168 |     {
169 |       "metadata": {
170 |         "id": "kaZMjQ0cIw9y",
171 |         "colab_type": "code",
172 |         "colab": {}
173 |       },
174 |       "cell_type": "code",
175 |       "source": [
176 |         "result"
177 |       ],
178 |       "execution_count": 0,
179 |       "outputs": []
180 |     }
181 |   ]
182 | }


--------------------------------------------------------------------------------
/imdb-sample.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wshuyi/demo-text-binary-classification-with-bert/ba349e41923636261c0b36385887758dc9ae0833/imdb-sample.pickle


--------------------------------------------------------------------------------