├── How multilingual is Multilingual BERT.pdf
├── Multilingual is not enough BERT for Finnish.pdf
├── Is Multilingual BERT Fluent in Language Generation.pdf
├── Are All Languages Created Equal in Multilingual BERT.pdf
├── Cross-Lingual Ability of Multilingual BERT An Empirical Study.pdf
├── Finding Universal Grammatical Relations in Multilingual BERT.pdf
├── Cross-Linguistic Syntactic Evaluation of Word Prediction Models.pdf
├── Emerging Cross-lingual Structure in Pretrained Language Models.pdf
├── A Study of Cross-Lingual Ability and Language-specific Information in Multilingual BERT.pdf
├── BERT for sequence classification.ipynb
└── simpletransformers-tuto.ipynb
/How multilingual is Multilingual BERT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/How multilingual is Multilingual BERT.pdf
--------------------------------------------------------------------------------
/Multilingual is not enough BERT for Finnish.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/Multilingual is not enough BERT for Finnish.pdf
--------------------------------------------------------------------------------
/Is Multilingual BERT Fluent in Language Generation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/Is Multilingual BERT Fluent in Language Generation.pdf
--------------------------------------------------------------------------------
/Are All Languages Created Equal in Multilingual BERT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/Are All Languages Created Equal in Multilingual BERT.pdf
--------------------------------------------------------------------------------
/Cross-Lingual Ability of Multilingual BERT An Empirical Study.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/Cross-Lingual Ability of Multilingual BERT An Empirical Study.pdf
--------------------------------------------------------------------------------
/Finding Universal Grammatical Relations in Multilingual BERT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/Finding Universal Grammatical Relations in Multilingual BERT.pdf
--------------------------------------------------------------------------------
/Cross-Linguistic Syntactic Evaluation of Word Prediction Models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/Cross-Linguistic Syntactic Evaluation of Word Prediction Models.pdf
--------------------------------------------------------------------------------
/Emerging Cross-lingual Structure in Pretrained Language Models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/Emerging Cross-lingual Structure in Pretrained Language Models.pdf
--------------------------------------------------------------------------------
/A Study of Cross-Lingual Ability and Language-specific Information in Multilingual BERT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NeuroData-ltd/Transformers_Tuto/HEAD/A Study of Cross-Lingual Ability and Language-specific Information in Multilingual BERT.pdf
--------------------------------------------------------------------------------
/BERT for sequence classification.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{"id":"EKOTlwcmxmej"},"cell_type":"markdown","source":"#Sentence Classification using BERT"},{"metadata":{"id":"DEfSbAA4QHas","outputId":"8431a054-3835-4c33-a465-3b63542a903f","trusted":true},"cell_type":"code","source":"import tensorflow as tf\n\n# Checking for the GPU\ndevice_name = tf.test.gpu_device_name()\nprint(device_name)\n","execution_count":1,"outputs":[{"output_type":"stream","text":"/device:GPU:0\n","name":"stdout"}]},{"metadata":{"id":"oYsV4H8fCpZ-","trusted":true},"cell_type":"code","source":"import torch\n\ndevice = torch.device(\"cuda\")\n\n","execution_count":2,"outputs":[]},{"metadata":{"id":"0NmMdkZO8R6q","outputId":"09e7d809-20ee-40d1-c45b-9c569992aed0","trusted":true},"cell_type":"code","source":"!pip install transformers","execution_count":3,"outputs":[{"output_type":"stream","text":"Requirement already satisfied: transformers in /opt/conda/lib/python3.7/site-packages (2.11.0)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from transformers) (3.0.10)\nRequirement already satisfied: tokenizers==0.7.0 in /opt/conda/lib/python3.7/site-packages (from transformers) (0.7.0)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from transformers) (1.18.5)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.7/site-packages (from transformers) (4.45.0)\nRequirement already satisfied: sentencepiece in /opt/conda/lib/python3.7/site-packages (from transformers) (0.1.91)\nRequirement already satisfied: sacremoses in /opt/conda/lib/python3.7/site-packages (from transformers) (0.0.43)\nRequirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from transformers) (2.23.0)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers) (2020.4.4)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.7/site-packages (from transformers) (20.1)\nRequirement already satisfied: joblib in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (0.14.1)\nRequirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (1.14.0)\nRequirement already satisfied: click in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (7.1.1)\nRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (1.24.3)\nRequirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (3.0.4)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2020.6.20)\nRequirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2.9)\nRequirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (2.4.7)\n\u001b[33mWARNING: You are using pip version 20.1.1; however, version 20.2.1 is available.\nYou should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n","name":"stdout"}]},{"metadata":{"id":"_9ZKxKc04Btk"},"cell_type":"markdown","source":"We are using [The Corpus of Linguistic Acceptability (CoLA)](https://nyu-mll.github.io/CoLA/) dataset for single sentence classification. It's a set of sentences labeled as grammatically correct or incorrect. It was first published in May of 2018, and is one of the tests included in the \"GLUE Benchmark\" on which models like BERT are competing.\n"},{"metadata":{"id":"5m6AnuFv0QXQ","outputId":"a48c0cf6-700d-42d9-af56-727fa45e9570","trusted":true},"cell_type":"code","source":"!pip install wget","execution_count":4,"outputs":[{"output_type":"stream","text":"Collecting wget\n Downloading wget-3.2.zip (10 kB)\nBuilding wheels for collected packages: wget\n Building wheel for wget (setup.py) ... \u001b[?25ldone\n\u001b[?25h Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9681 sha256=1fee3da5a1b37e03d2e688ad969ec281950f9aac7b6677a760b3047869c06352\n Stored in directory: /root/.cache/pip/wheels/a1/b6/7c/0e63e34eb06634181c63adacca38b79ff8f35c37e3c13e3c02\nSuccessfully built wget\nInstalling collected packages: wget\nSuccessfully installed wget-3.2\n\u001b[33mWARNING: You are using pip version 20.1.1; however, version 20.2.1 is available.\nYou should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n","name":"stdout"}]},{"metadata":{"id":"08pO03Ff1BjI"},"cell_type":"markdown","source":"The original dataset is available on: https://nyu-mll.github.io/CoLA/"},{"metadata":{"id":"pMtmPMkBzrvs","outputId":"5be0de08-7b87-422d-e783-396f10af4fbf","trusted":true},"cell_type":"code","source":"import wget\nimport os\n\n\n\n# The URL for the dataset zip file.\nurl = 'https://nyu-mll.github.io/CoLA/cola_public_1.1.zip'\n\nwget.download(url, './cola_public_1.1.zip')","execution_count":5,"outputs":[{"output_type":"execute_result","execution_count":5,"data":{"text/plain":"'./cola_public_1.1.zip'"},"metadata":{}}]},{"metadata":{"id":"0Yv-tNv20dnH","outputId":"4014707a-94ee-40d9-ab32-66af403fe0bb","trusted":true},"cell_type":"code","source":"# Unzipping the dataset\nif not os.path.exists('./cola_public/'):\n !unzip cola_public_1.1.zip","execution_count":6,"outputs":[{"output_type":"stream","text":"Archive: cola_public_1.1.zip\r\n creating: cola_public/\r\n inflating: cola_public/README \r\n creating: cola_public/tokenized/\r\n inflating: cola_public/tokenized/in_domain_dev.tsv \r\n inflating: cola_public/tokenized/in_domain_train.tsv \r\n inflating: cola_public/tokenized/out_of_domain_dev.tsv \r\n creating: cola_public/raw/\r\n inflating: cola_public/raw/in_domain_dev.tsv \r\n inflating: cola_public/raw/in_domain_train.tsv \r\n inflating: cola_public/raw/out_of_domain_dev.tsv \r\n","name":"stdout"}]},{"metadata":{"id":"_UkeC7SG2krJ","outputId":"e20220fa-7ba5-4f59-d84d-772e95f18eff","trusted":true},"cell_type":"code","source":"import pandas as pd\n\n# Load the dataset into a pandas dataframe.\ndf = pd.read_csv(\"./cola_public/raw/in_domain_train.tsv\", delimiter='\\t', header=None, names=['sentence_source', 'label', 'label_notes', 'sentence'])\n\n# Report the number of sentences.\nprint('Number of training sentences: {:,}\\n'.format(df.shape[0]))\n\n# Display 10 random rows from the data.\ndf.sample(10)","execution_count":7,"outputs":[{"output_type":"stream","text":"Number of training sentences: 8,551\n\n","name":"stdout"},{"output_type":"execute_result","execution_count":7,"data":{"text/plain":" sentence_source label label_notes \\\n4835 ks08 1 NaN \n5096 ks08 0 * \n2598 l-93 1 NaN \n5035 ks08 1 NaN \n3572 ks08 1 NaN \n3022 l-93 1 NaN \n3913 ks08 1 NaN \n6624 m_02 1 NaN \n315 bc01 1 NaN \n4149 ks08 0 * \n\n sentence \n4835 When did he say that he was fired? \n5096 They caught her without a license is what happ... \n2598 Jessica squirted water at me. \n5035 It annoys people that dogs bark. \n3572 John will study syntax, and Mary, too. \n3022 I was hunting. \n3913 Kim relies on Sandy. \n6624 There are fewer trucks on the motorway this wi... \n315 Why did John leave? \n4149 The boys swim. ","text/html":"
\n\n
\n \n \n | \n sentence_source | \n label | \n label_notes | \n sentence | \n
\n \n \n \n | 4835 | \n ks08 | \n 1 | \n NaN | \n When did he say that he was fired? | \n
\n \n | 5096 | \n ks08 | \n 0 | \n * | \n They caught her without a license is what happ... | \n
\n \n | 2598 | \n l-93 | \n 1 | \n NaN | \n Jessica squirted water at me. | \n
\n \n | 5035 | \n ks08 | \n 1 | \n NaN | \n It annoys people that dogs bark. | \n
\n \n | 3572 | \n ks08 | \n 1 | \n NaN | \n John will study syntax, and Mary, too. | \n
\n \n | 3022 | \n l-93 | \n 1 | \n NaN | \n I was hunting. | \n
\n \n | 3913 | \n ks08 | \n 1 | \n NaN | \n Kim relies on Sandy. | \n
\n \n | 6624 | \n m_02 | \n 1 | \n NaN | \n There are fewer trucks on the motorway this wi... | \n
\n \n | 315 | \n bc01 | \n 1 | \n NaN | \n Why did John leave? | \n
\n \n | 4149 | \n ks08 | \n 0 | \n * | \n The boys swim. | \n
\n \n
\n
"},"metadata":{}}]},{"metadata":{"id":"blqIvQaQncdJ","outputId":"61439ea0-6fc6-4556-82d9-11e82b5aa619","trusted":true},"cell_type":"code","source":"df.loc[df.label == 0].sample(5)[['sentence', 'label']]","execution_count":8,"outputs":[{"output_type":"execute_result","execution_count":8,"data":{"text/plain":" sentence label\n5430 He's the bit of a gossip. 0\n4018 Monkeys are eager leaving the compound. 0\n8131 She kicked itself 0\n6766 The person who never had he been so offended w... 0\n1967 I deny that that Bob has any money is certain. 0","text/html":"\n\n
\n \n \n | \n sentence | \n label | \n
\n \n \n \n | 5430 | \n He's the bit of a gossip. | \n 0 | \n
\n \n | 4018 | \n Monkeys are eager leaving the compound. | \n 0 | \n
\n \n | 8131 | \n She kicked itself | \n 0 | \n
\n \n | 6766 | \n The person who never had he been so offended w... | \n 0 | \n
\n \n | 1967 | \n I deny that that Bob has any money is certain. | \n 0 | \n
\n \n
\n
"},"metadata":{}}]},{"metadata":{"id":"GuE5BqICAne2","trusted":true},"cell_type":"code","source":"# Get the lists of sentences and their labels.\nsentences = df.sentence.values\nlabels = df.label.values","execution_count":9,"outputs":[]},{"metadata":{"id":"Z474sSC6oe7A","outputId":"5cac4edc-a347-4d50-99d6-9dd1f4fd99d2","trusted":true},"cell_type":"code","source":"from transformers import BertTokenizer\n\n# Load the BERT tokenizer.\n\ntokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)","execution_count":10,"outputs":[{"output_type":"stream","text":"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.\n","name":"stderr"},{"output_type":"display_data","data":{"text/plain":"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"61e88f31d117465db61a1ed27b67e2d5"}},"metadata":{}},{"output_type":"stream","text":"\n","name":"stdout"}]},{"metadata":{"id":"dLIbudgfh6F0","outputId":"73326d52-d489-4e06-b706-99e6a5f7d57a","trusted":true},"cell_type":"code","source":"# Print the original sentence.\nprint(' Original: ', sentences[0])\n\n# Print the sentence split into tokens.\nprint('Tokenized: ', tokenizer.tokenize(sentences[0]))\n\n# Print the sentence mapped to token ids.\nprint('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentences[0])))","execution_count":11,"outputs":[{"output_type":"stream","text":" Original: Our friends won't buy this analysis, let alone the next one we propose.\nTokenized: ['our', 'friends', 'won', \"'\", 't', 'buy', 'this', 'analysis', ',', 'let', 'alone', 'the', 'next', 'one', 'we', 'propose', '.']\nToken IDs: [2256, 2814, 2180, 1005, 1056, 4965, 2023, 4106, 1010, 2292, 2894, 1996, 2279, 2028, 2057, 16599, 1012]\n","name":"stdout"}]},{"metadata":{"id":"2bBdb3pt8LuQ","outputId":"043462be-49c9-4a91-92c7-268acf61a6d3","trusted":true},"cell_type":"code","source":"# Tokenize all of the sentences and map the tokens to thier word IDs.\ninput_ids = []\n\n# For every sentence...\nfor sent in sentences:\n # `encode` will:\n # (1) Tokenize the sentence.\n # (2) Prepend the `[CLS]` token to the start.\n # (3) Append the `[SEP]` token to the end.\n # (4) Map tokens to their IDs.\n encoded_sent = tokenizer.encode(\n sent\n )\n \n # Add the encoded sentence to the list.\n input_ids.append(encoded_sent)\n\n# Print sentence 0, now as a list of IDs.\nprint('Original: ', sentences[0])\nprint('Token IDs:', input_ids[0])","execution_count":12,"outputs":[{"output_type":"stream","text":"Original: Our friends won't buy this analysis, let alone the next one we propose.\nToken IDs: [101, 2256, 2814, 2180, 1005, 1056, 4965, 2023, 4106, 1010, 2292, 2894, 1996, 2279, 2028, 2057, 16599, 1012, 102]\n","name":"stdout"}]},{"metadata":{"id":"JhUZO9vc_l6T","outputId":"20d43d20-5b2c-4c51-f3b1-dd59f20646da","trusted":true},"cell_type":"code","source":"print('Max sentence length: ', max([len(sen) for sen in input_ids]))","execution_count":13,"outputs":[{"output_type":"stream","text":"Max sentence length: 47\n","name":"stdout"}]},{"metadata":{"id":"Cp9BPRd1tMIo","trusted":true},"cell_type":"code","source":"# We will use some utility function from tensorflow(Tensorflow was my first crush)\nfrom keras.preprocessing.sequence import pad_sequences\n\n\nMAX_LEN = 64\n\n#Padding the input to the max length that is 64\ninput_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype=\"long\", \n value=0, truncating=\"post\", padding=\"post\")\n\n","execution_count":15,"outputs":[]},{"metadata":{"id":"cDoC24LeEv3N","trusted":true},"cell_type":"code","source":"# Creating the attention masks\nattention_masks = []\n\n# For each sentence...\nfor sent in input_ids:\n \n # Create the attention mask.\n # - If a token ID is 0, then it's padding, set the mask to 0.\n # - If a token ID is > 0, then it's a real token, set the mask to 1.\n att_mask = [int(token_id > 0) for token_id in sent]\n \n # Store the attention mask for this sentence.\n attention_masks.append(att_mask)","execution_count":16,"outputs":[]},{"metadata":{"id":"aFbE-UHvsb7-","trusted":true},"cell_type":"code","source":"# We will call the train_test_split() function from sklearn\nfrom sklearn.model_selection import train_test_split\n\n\ntrain_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, \n random_state=2018, test_size=0.1)\n# Performing same steps on the attention masks\ntrain_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,\n random_state=2018, test_size=0.1)","execution_count":17,"outputs":[]},{"metadata":{"id":"jw5K2A5Ko1RF","trusted":true},"cell_type":"code","source":"#Converting the input data to the tensor , which can be feeded to the model\ntrain_inputs = torch.tensor(train_inputs)\nvalidation_inputs = torch.tensor(validation_inputs)\n\ntrain_labels = torch.tensor(train_labels)\nvalidation_labels = torch.tensor(validation_labels)\n\ntrain_masks = torch.tensor(train_masks)\nvalidation_masks = torch.tensor(validation_masks)","execution_count":18,"outputs":[]},{"metadata":{"id":"GEgLpFVlo1Z-","trusted":true},"cell_type":"code","source":"from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n\n#Creating the DataLoader which will help us to load data into the GPU/CPU\nbatch_size = 32\n\n# Create the DataLoader for our training set.\ntrain_data = TensorDataset(train_inputs, train_masks, train_labels)\ntrain_sampler = RandomSampler(train_data)\ntrain_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n\n# Create the DataLoader for our validation set.\nvalidation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)\nvalidation_sampler = SequentialSampler(validation_data)\nvalidation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)\n","execution_count":19,"outputs":[]},{"metadata":{"id":"gFsCTp_mporB","outputId":"60b3ee08-16e0-464a-f784-684df32628c3","trusted":true},"cell_type":"code","source":"#Loading the pre-trained BERT model from huggingface library\n\nfrom transformers import BertForSequenceClassification, AdamW, BertConfig\n\n# Load BertForSequenceClassification, the pretrained BERT model with a single \n# linear classification layer on top. \nmodel = BertForSequenceClassification.from_pretrained(\n \"bert-base-uncased\", \n num_labels = 2, \n output_attentions = False, \n output_hidden_states = False, )\n\n# Teeling the model to run on GPU\nmodel.cuda()","execution_count":20,"outputs":[{"output_type":"display_data","data":{"text/plain":"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9387e2b8395f4887a5fd32129de8f43a"}},"metadata":{}},{"output_type":"stream","text":"\n","name":"stdout"},{"output_type":"display_data","data":{"text/plain":"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e324df85b8524457a1e3423a0a62d728"}},"metadata":{}},{"output_type":"stream","text":"\n","name":"stdout"},{"output_type":"execute_result","execution_count":20,"data":{"text/plain":"BertForSequenceClassification(\n (bert): BertModel(\n (embeddings): BertEmbeddings(\n (word_embeddings): Embedding(30522, 768, padding_idx=0)\n (position_embeddings): Embedding(512, 768)\n (token_type_embeddings): Embedding(2, 768)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (encoder): BertEncoder(\n (layer): ModuleList(\n (0): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (1): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (2): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (3): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (4): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (5): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (6): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (7): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (8): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (9): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (10): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (11): BertLayer(\n (attention): BertAttention(\n (self): BertSelfAttention(\n (query): Linear(in_features=768, out_features=768, bias=True)\n (key): Linear(in_features=768, out_features=768, bias=True)\n (value): Linear(in_features=768, out_features=768, bias=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n (output): BertSelfOutput(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n (intermediate): BertIntermediate(\n (dense): Linear(in_features=768, out_features=3072, bias=True)\n )\n (output): BertOutput(\n (dense): Linear(in_features=3072, out_features=768, bias=True)\n (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n (dropout): Dropout(p=0.1, inplace=False)\n )\n )\n )\n )\n (pooler): BertPooler(\n (dense): Linear(in_features=768, out_features=768, bias=True)\n (activation): Tanh()\n )\n )\n (dropout): Dropout(p=0.1, inplace=False)\n (classifier): Linear(in_features=768, out_features=2, bias=True)\n)"},"metadata":{}}]},{"metadata":{"id":"GLs72DuMODJO","trusted":true},"cell_type":"code","source":"# AdamW is an optimizer which is a Adam Optimzier with weight-decay-fix\noptimizer = AdamW(model.parameters(),\n lr = 2e-5, \n eps = 1e-8 \n )\n","execution_count":21,"outputs":[]},{"metadata":{"id":"-p0upAhhRiIx","outputId":"3a075bc4-4570-4f37-b8f8-1d31a91d32a9","trusted":true},"cell_type":"code","source":"from transformers import get_linear_schedule_with_warmup\n\n# Number of training epochs (authors recommend between 2 and 4)\nepochs = 4\n\n# Total number of training steps is number of batches * number of epochs.\ntotal_steps = len(train_dataloader) * epochs\n\n# Create the learning rate scheduler.\nscheduler = get_linear_schedule_with_warmup(optimizer, \n num_warmup_steps = 0, # Default value in run_glue.py\n num_training_steps = total_steps)\nscheduler","execution_count":22,"outputs":[{"output_type":"execute_result","execution_count":22,"data":{"text/plain":""},"metadata":{}}]},{"metadata":{"id":"pE5B99H5H2-W"},"cell_type":"markdown","source":"Define a helper function for calculating accuracy."},{"metadata":{"id":"9cQNvaZ9bnyy","trusted":true},"cell_type":"code","source":"import numpy as np\n\n# Function to calculate the accuracy of our predictions vs labels\ndef flat_accuracy(preds, labels):\n pred_flat = np.argmax(preds, axis=1).flatten()\n labels_flat = labels.flatten()\n return np.sum(pred_flat == labels_flat) / len(labels_flat)","execution_count":23,"outputs":[]},{"metadata":{"id":"gpt6tR83keZD","trusted":true},"cell_type":"code","source":"#Creating the helper function to have a watch on elapsed time\n\nimport time\nimport datetime\n\ndef format_time(elapsed):\n '''\n Takes a time in seconds and returns a string hh:mm:ss\n '''\n # Round to the nearest second.\n elapsed_rounded = int(round((elapsed)))\n \n # Format as hh:mm:ss\n return str(datetime.timedelta(seconds=elapsed_rounded))\n","execution_count":26,"outputs":[]},{"metadata":{"id":"6J-FYdx6nFE_","outputId":"40adc853-1e64-49ef-a340-7c033918903a","trusted":true},"cell_type":"code","source":"#Let's start the training process\n\nimport random\n\n# This training code is based on the `run_glue.py` script here:\n# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n\n\n# Set the seed value all over the place to make this reproducible.\nseed_val = 42\n\nrandom.seed(seed_val)\nnp.random.seed(seed_val)\ntorch.manual_seed(seed_val)\ntorch.cuda.manual_seed_all(seed_val)\n\n# Store the average loss after each epoch so we can plot them.\nloss_values = []\n\n# For each epoch...\nfor epoch_i in range(0, epochs):\n \n # ========================================\n # Training\n # ========================================\n \n # Perform one full pass over the training set.\n\n print(\"\")\n print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n print('Training...')\n\n # Measure how long the training epoch takes.\n t0 = time.time()\n\n # Reset the total loss for this epoch.\n total_loss = 0\n\n # Put the model into training mode. Don't be mislead--the call to \n # `train` just changes the *mode*, it doesn't *perform* the training.\n # `dropout` and `batchnorm` layers behave differently during training\n # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n model.train()\n\n # For each batch of training data...\n for step, batch in enumerate(train_dataloader):\n\n # Progress update every 40 batches.\n if step % 40 == 0 and not step == 0:\n # Calculate elapsed time in minutes.\n elapsed = format_time(time.time() - t0)\n \n # Report progress.\n print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n\n # Unpack this training batch from our dataloader. \n #\n # As we unpack the batch, we'll also copy each tensor to the GPU using the \n # `to` method.\n #\n # `batch` contains three pytorch tensors:\n # [0]: input ids \n # [1]: attention masks\n # [2]: labels \n b_input_ids = batch[0].to(device)\n b_input_mask = batch[1].to(device)\n b_labels = batch[2].to(device)\n\n # Always clear any previously calculated gradients before performing a\n # backward pass. PyTorch doesn't do this automatically because \n # accumulating the gradients is \"convenient while training RNNs\". \n # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n model.zero_grad() \n\n # Perform a forward pass (evaluate the model on this training batch).\n # This will return the loss (rather than the model output) because we\n # have provided the `labels`.\n # The documentation for this `model` function is here: \n # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n outputs = model(b_input_ids, \n token_type_ids=None, \n attention_mask=b_input_mask, \n labels=b_labels)\n \n # The call to `model` always returns a tuple, so we need to pull the \n # loss value out of the tuple.\n loss = outputs[0]\n\n # Accumulate the training loss over all of the batches so that we can\n # calculate the average loss at the end. `loss` is a Tensor containing a\n # single value; the `.item()` function just returns the Python value \n # from the tensor.\n total_loss += loss.item()\n\n # Perform a backward pass to calculate the gradients.\n loss.backward()\n\n # Clip the norm of the gradients to 1.0.\n # This is to help prevent the \"exploding gradients\" problem.\n torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n\n # Update parameters and take a step using the computed gradient.\n # The optimizer dictates the \"update rule\"--how the parameters are\n # modified based on their gradients, the learning rate, etc.\n optimizer.step()\n\n # Update the learning rate.\n scheduler.step()\n\n # Calculate the average loss over the training data.\n avg_train_loss = total_loss / len(train_dataloader) \n \n # Store the loss value for plotting the learning curve.\n loss_values.append(avg_train_loss)\n\n print(\"\")\n print(\" Average training loss: {0:.2f}\".format(avg_train_loss))\n print(\" Training epoch took: {:}\".format(format_time(time.time() - t0)))\n \n # ========================================\n # Validation\n # ========================================\n # After the completion of each training epoch, measure our performance on\n # our validation set.\n\n print(\"\")\n print(\"Running Validation...\")\n\n t0 = time.time()\n\n # Put the model in evaluation mode--the dropout layers behave differently\n # during evaluation.\n model.eval()\n\n # Tracking variables \n eval_loss, eval_accuracy = 0, 0\n nb_eval_steps, nb_eval_examples = 0, 0\n\n # Evaluate data for one epoch\n for batch in validation_dataloader:\n \n # Add batch to GPU\n batch = tuple(t.to(device) for t in batch)\n \n # Unpack the inputs from our dataloader\n b_input_ids, b_input_mask, b_labels = batch\n \n # Telling the model not to compute or store gradients, saving memory and\n # speeding up validation\n with torch.no_grad(): \n\n # Forward pass, calculate logit predictions.\n # This will return the logits rather than the loss because we have\n # not provided labels.\n # token_type_ids is the same as the \"segment ids\", which \n # differentiates sentence 1 and 2 in 2-sentence tasks.\n # The documentation for this `model` function is here: \n # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n outputs = model(b_input_ids, \n token_type_ids=None, \n attention_mask=b_input_mask)\n \n # Get the \"logits\" output by the model. The \"logits\" are the output\n # values prior to applying an activation function like the softmax.\n logits = outputs[0]\n\n # Move logits and labels to CPU\n logits = logits.detach().cpu().numpy()\n label_ids = b_labels.to('cpu').numpy()\n \n # Calculate the accuracy for this batch of test sentences.\n tmp_eval_accuracy = flat_accuracy(logits, label_ids)\n \n # Accumulate the total accuracy.\n eval_accuracy += tmp_eval_accuracy\n\n # Track the number of batches\n nb_eval_steps += 1\n\n # Report the final accuracy for this validation run.\n print(\" Accuracy: {0:.2f}\".format(eval_accuracy/nb_eval_steps))\n print(\" Validation took: {:}\".format(format_time(time.time() - t0)))\n\nprint(\"\")\nprint(\"Training complete!\")","execution_count":27,"outputs":[{"output_type":"stream","text":"\n======== Epoch 1 / 4 ========\nTraining...\n Batch 40 of 241. Elapsed: 0:00:08.\n Batch 80 of 241. Elapsed: 0:00:17.\n Batch 120 of 241. Elapsed: 0:00:25.\n Batch 160 of 241. Elapsed: 0:00:34.\n Batch 200 of 241. Elapsed: 0:00:42.\n Batch 240 of 241. Elapsed: 0:00:50.\n\n Average training loss: 0.10\n Training epoch took: 0:00:50\n\nRunning Validation...\n Accuracy: 0.82\n Validation took: 0:00:02\n\n======== Epoch 2 / 4 ========\nTraining...\n Batch 40 of 241. Elapsed: 0:00:08.\n Batch 80 of 241. Elapsed: 0:00:17.\n Batch 120 of 241. Elapsed: 0:00:25.\n Batch 160 of 241. Elapsed: 0:00:33.\n Batch 200 of 241. Elapsed: 0:00:42.\n Batch 240 of 241. Elapsed: 0:00:50.\n\n Average training loss: 0.07\n Training epoch took: 0:00:50\n\nRunning Validation...\n Accuracy: 0.82\n Validation took: 0:00:02\n\n======== Epoch 3 / 4 ========\nTraining...\n Batch 40 of 241. Elapsed: 0:00:08.\n Batch 80 of 241. Elapsed: 0:00:17.\n Batch 120 of 241. Elapsed: 0:00:25.\n Batch 160 of 241. Elapsed: 0:00:33.\n Batch 200 of 241. Elapsed: 0:00:42.\n Batch 240 of 241. Elapsed: 0:00:50.\n\n Average training loss: 0.07\n Training epoch took: 0:00:50\n\nRunning Validation...\n Accuracy: 0.82\n Validation took: 0:00:02\n\n======== Epoch 4 / 4 ========\nTraining...\n Batch 40 of 241. Elapsed: 0:00:08.\n Batch 80 of 241. Elapsed: 0:00:17.\n Batch 120 of 241. Elapsed: 0:00:25.\n Batch 160 of 241. Elapsed: 0:00:33.\n Batch 200 of 241. Elapsed: 0:00:41.\n Batch 240 of 241. Elapsed: 0:00:50.\n\n Average training loss: 0.10\n Training epoch took: 0:00:50\n\nRunning Validation...\n Accuracy: 0.82\n Validation took: 0:00:02\n\nTraining complete!\n","name":"stdout"}]},{"metadata":{"id":"btUsZ5vMyjwt","outputId":"b3b54514-ed69-448a-a1dd-11480b8af8cd","trusted":true},"cell_type":"code","source":"print(loss_values) #Having a view of stored loss values in the list","execution_count":31,"outputs":[{"output_type":"stream","text":"[0.09573989999538511, 0.07170906540825654, 0.07369904678423128, 0.09808315255551665]\n","name":"stdout"}]},{"metadata":{"id":"mAN0LZBOOPVh","outputId":"271cd979-0fd3-4782-8cc1-934802584821","trusted":true},"cell_type":"code","source":"#Loading the test data and applying the same preprocessing techniques which we performed on the train data\nimport pandas as pd\n\n# Load the dataset into a pandas dataframe.\ndf = pd.read_csv(\"./cola_public/raw/out_of_domain_dev.tsv\", delimiter='\\t', header=None, names=['sentence_source', 'label', 'label_notes', 'sentence'])\n\n# Report the number of sentences.\nprint('Number of test sentences: {:,}\\n'.format(df.shape[0]))\n\n# Create sentence and label lists\nsentences = df.sentence.values\nlabels = df.label.values\n\n# Tokenize all of the sentences and map the tokens to thier word IDs.\ninput_ids = []\n\n# For every sentence...\nfor sent in sentences:\n # `encode` will:\n # (1) Tokenize the sentence.\n # (2) Prepend the `[CLS]` token to the start.\n # (3) Append the `[SEP]` token to the end.\n # (4) Map tokens to their IDs.\n encoded_sent = tokenizer.encode(\n sent, # Sentence to encode.\n add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n )\n \n input_ids.append(encoded_sent)\n\n# Pad our input tokens\ninput_ids = pad_sequences(input_ids, maxlen=MAX_LEN, \n dtype=\"long\", truncating=\"post\", padding=\"post\")\n\n# Create attention masks\nattention_masks = []\n\n# Create a mask of 1s for each token followed by 0s for padding\nfor seq in input_ids:\n seq_mask = [float(i>0) for i in seq]\n attention_masks.append(seq_mask) \n\n# Convert to tensors.\nprediction_inputs = torch.tensor(input_ids)\nprediction_masks = torch.tensor(attention_masks)\nprediction_labels = torch.tensor(labels)\n\n# Set the batch size. \nbatch_size = 32 \n\n# Create the DataLoader.\nprediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)\nprediction_sampler = SequentialSampler(prediction_data)\nprediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)","execution_count":32,"outputs":[{"output_type":"stream","text":"Number of test sentences: 516\n\n","name":"stdout"}]},{"metadata":{"id":"Hba10sXR7Xi6","outputId":"8c65c0d5-4498-44a4-e025-3b29b4e86923","trusted":true},"cell_type":"code","source":"#Evaluating our model on the test set\n\n# Prediction on test set\n\nprint('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs)))\n\n# Put model in evaluation mode\nmodel.eval()\n\n# Tracking variables \npredictions , true_labels = [], []\n\n# Predict \nfor batch in prediction_dataloader:\n # Add batch to GPU\n batch = tuple(t.to(device) for t in batch)\n \n # Unpack the inputs from our dataloader\n b_input_ids, b_input_mask, b_labels = batch\n \n # Telling the model not to compute or store gradients, saving memory and \n # speeding up prediction\n with torch.no_grad():\n # Forward pass, calculate logit predictions\n outputs = model(b_input_ids, token_type_ids=None, \n attention_mask=b_input_mask)\n\n logits = outputs[0]\n\n # Move logits and labels to CPU\n logits = logits.detach().cpu().numpy()\n label_ids = b_labels.to('cpu').numpy()\n \n # Store predictions and true labels\n predictions.append(logits)\n true_labels.append(label_ids)\n\n","execution_count":33,"outputs":[{"output_type":"stream","text":"Predicting labels for 516 test sentences...\n","name":"stdout"}]},{"metadata":{"id":"-5jscIM8R4Gv"},"cell_type":"markdown","source":"We will use Matthews Correlation Coefficient(MCC) to evaluate our model. \nMCC is used in many areas of Natural Language Processing. Also, it's a great metric to be used for imbalanced dataset\n\nLink: https://towardsdatascience.com/the-best-classification-metric-youve-never-heard-of-the-matthews-correlation-coefficient-3bf50a2f3e9a\n"},{"metadata":{"id":"hWcy0X1hirdx","outputId":"c6b0e7ec-cadb-4300-fca5-60cb8be23cfb","trusted":true},"cell_type":"code","source":"print('Positive samples: %d of %d (%.2f%%)' % (df.label.sum(), len(df.label), (df.label.sum() / len(df.label) * 100.0)))","execution_count":34,"outputs":[{"output_type":"stream","text":"Positive samples: 354 of 516 (68.60%)\n","name":"stdout"}]},{"metadata":{"id":"cRaZQ4XC7kLs","outputId":"2f28c93d-1c0f-452b-81b6-2a06f6e05c51","trusted":true},"cell_type":"code","source":"from sklearn.metrics import matthews_corrcoef\n\nmatthews_set = []\n\n# Evaluate each test batch using Matthew's correlation coefficient\nprint('Calculating Matthews Corr. Coef. for each batch...')\n\n# For each input batch...\nfor i in range(len(true_labels)):\n \n # The predictions for this batch are a 2-column ndarray (one column for \"0\" \n # and one column for \"1\"). Pick the label with the highest value and turn this\n # in to a list of 0s and 1s.\n pred_labels_i = np.argmax(predictions[i], axis=1).flatten()\n \n # Calculate and store the coef for this batch. \n matthews = matthews_corrcoef(true_labels[i], pred_labels_i) \n matthews_set.append(matthews)","execution_count":35,"outputs":[{"output_type":"stream","text":"Calculating Matthews Corr. Coef. for each batch...\n","name":"stdout"},{"output_type":"stream","text":"/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars\n mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n","name":"stderr"}]},{"metadata":{"id":"oCYZa1lQ8Jn8","outputId":"e0e4026c-b20a-4dab-a817-49874d2b5a73","trusted":true},"cell_type":"code","source":"# Combine the predictions for each batch into a single list of 0s and 1s.\nflat_predictions = [item for sublist in predictions for item in sublist]\nflat_predictions = np.argmax(flat_predictions, axis=1).flatten()\n\n# Combine the correct labels for each batch into a single list.\nflat_true_labels = [item for sublist in true_labels for item in sublist]\n\n# Calculate the MCC\nmcc = matthews_corrcoef(flat_true_labels, flat_predictions)\n\nprint('MCC: %.3f' % mcc)","execution_count":36,"outputs":[{"output_type":"stream","text":"MCC: 0.529\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat":4,"nbformat_minor":4}
--------------------------------------------------------------------------------
/simpletransformers-tuto.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "TopicClassifier.ipynb",
7 | "provenance": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "accelerator": "GPU",
14 | "widgets": {
15 | "application/vnd.jupyter.widget-state+json": {
16 | "dcefe0f8a6a74548b5688d060624e960": {
17 | "model_module": "@jupyter-widgets/controls",
18 | "model_name": "HBoxModel",
19 | "state": {
20 | "_view_name": "HBoxView",
21 | "_dom_classes": [],
22 | "_model_name": "HBoxModel",
23 | "_view_module": "@jupyter-widgets/controls",
24 | "_model_module_version": "1.5.0",
25 | "_view_count": null,
26 | "_view_module_version": "1.5.0",
27 | "box_style": "",
28 | "layout": "IPY_MODEL_9eda6253942a45fb96f869e054a599ec",
29 | "_model_module": "@jupyter-widgets/controls",
30 | "children": [
31 | "IPY_MODEL_c84b59de4294480aaa9d233e41ccbed1",
32 | "IPY_MODEL_448feec51d264190898f92f6481feb15"
33 | ]
34 | }
35 | },
36 | "9eda6253942a45fb96f869e054a599ec": {
37 | "model_module": "@jupyter-widgets/base",
38 | "model_name": "LayoutModel",
39 | "state": {
40 | "_view_name": "LayoutView",
41 | "grid_template_rows": null,
42 | "right": null,
43 | "justify_content": null,
44 | "_view_module": "@jupyter-widgets/base",
45 | "overflow": null,
46 | "_model_module_version": "1.2.0",
47 | "_view_count": null,
48 | "flex_flow": null,
49 | "width": null,
50 | "min_width": null,
51 | "border": null,
52 | "align_items": null,
53 | "bottom": null,
54 | "_model_module": "@jupyter-widgets/base",
55 | "top": null,
56 | "grid_column": null,
57 | "overflow_y": null,
58 | "overflow_x": null,
59 | "grid_auto_flow": null,
60 | "grid_area": null,
61 | "grid_template_columns": null,
62 | "flex": null,
63 | "_model_name": "LayoutModel",
64 | "justify_items": null,
65 | "grid_row": null,
66 | "max_height": null,
67 | "align_content": null,
68 | "visibility": null,
69 | "align_self": null,
70 | "height": null,
71 | "min_height": null,
72 | "padding": null,
73 | "grid_auto_rows": null,
74 | "grid_gap": null,
75 | "max_width": null,
76 | "order": null,
77 | "_view_module_version": "1.2.0",
78 | "grid_template_areas": null,
79 | "object_position": null,
80 | "object_fit": null,
81 | "grid_auto_columns": null,
82 | "margin": null,
83 | "display": null,
84 | "left": null
85 | }
86 | },
87 | "c84b59de4294480aaa9d233e41ccbed1": {
88 | "model_module": "@jupyter-widgets/controls",
89 | "model_name": "FloatProgressModel",
90 | "state": {
91 | "_view_name": "ProgressView",
92 | "style": "IPY_MODEL_9f484421dc8943b8afc9419134a6de4e",
93 | "_dom_classes": [],
94 | "description": "Downloading: 100%",
95 | "_model_name": "FloatProgressModel",
96 | "bar_style": "success",
97 | "max": 481,
98 | "_view_module": "@jupyter-widgets/controls",
99 | "_model_module_version": "1.5.0",
100 | "value": 481,
101 | "_view_count": null,
102 | "_view_module_version": "1.5.0",
103 | "orientation": "horizontal",
104 | "min": 0,
105 | "description_tooltip": null,
106 | "_model_module": "@jupyter-widgets/controls",
107 | "layout": "IPY_MODEL_67bc3d8ed4bb45a3b0beb3b83a0eddc0"
108 | }
109 | },
110 | "448feec51d264190898f92f6481feb15": {
111 | "model_module": "@jupyter-widgets/controls",
112 | "model_name": "HTMLModel",
113 | "state": {
114 | "_view_name": "HTMLView",
115 | "style": "IPY_MODEL_e3bf7cc4668d47c2908fddadb9d23b46",
116 | "_dom_classes": [],
117 | "description": "",
118 | "_model_name": "HTMLModel",
119 | "placeholder": "",
120 | "_view_module": "@jupyter-widgets/controls",
121 | "_model_module_version": "1.5.0",
122 | "value": " 481/481 [00:00<00:00, 2.23kB/s]",
123 | "_view_count": null,
124 | "_view_module_version": "1.5.0",
125 | "description_tooltip": null,
126 | "_model_module": "@jupyter-widgets/controls",
127 | "layout": "IPY_MODEL_923bc5a1e4d84cf0b1d7be8f10928155"
128 | }
129 | },
130 | "9f484421dc8943b8afc9419134a6de4e": {
131 | "model_module": "@jupyter-widgets/controls",
132 | "model_name": "ProgressStyleModel",
133 | "state": {
134 | "_view_name": "StyleView",
135 | "_model_name": "ProgressStyleModel",
136 | "description_width": "initial",
137 | "_view_module": "@jupyter-widgets/base",
138 | "_model_module_version": "1.5.0",
139 | "_view_count": null,
140 | "_view_module_version": "1.2.0",
141 | "bar_color": null,
142 | "_model_module": "@jupyter-widgets/controls"
143 | }
144 | },
145 | "67bc3d8ed4bb45a3b0beb3b83a0eddc0": {
146 | "model_module": "@jupyter-widgets/base",
147 | "model_name": "LayoutModel",
148 | "state": {
149 | "_view_name": "LayoutView",
150 | "grid_template_rows": null,
151 | "right": null,
152 | "justify_content": null,
153 | "_view_module": "@jupyter-widgets/base",
154 | "overflow": null,
155 | "_model_module_version": "1.2.0",
156 | "_view_count": null,
157 | "flex_flow": null,
158 | "width": null,
159 | "min_width": null,
160 | "border": null,
161 | "align_items": null,
162 | "bottom": null,
163 | "_model_module": "@jupyter-widgets/base",
164 | "top": null,
165 | "grid_column": null,
166 | "overflow_y": null,
167 | "overflow_x": null,
168 | "grid_auto_flow": null,
169 | "grid_area": null,
170 | "grid_template_columns": null,
171 | "flex": null,
172 | "_model_name": "LayoutModel",
173 | "justify_items": null,
174 | "grid_row": null,
175 | "max_height": null,
176 | "align_content": null,
177 | "visibility": null,
178 | "align_self": null,
179 | "height": null,
180 | "min_height": null,
181 | "padding": null,
182 | "grid_auto_rows": null,
183 | "grid_gap": null,
184 | "max_width": null,
185 | "order": null,
186 | "_view_module_version": "1.2.0",
187 | "grid_template_areas": null,
188 | "object_position": null,
189 | "object_fit": null,
190 | "grid_auto_columns": null,
191 | "margin": null,
192 | "display": null,
193 | "left": null
194 | }
195 | },
196 | "e3bf7cc4668d47c2908fddadb9d23b46": {
197 | "model_module": "@jupyter-widgets/controls",
198 | "model_name": "DescriptionStyleModel",
199 | "state": {
200 | "_view_name": "StyleView",
201 | "_model_name": "DescriptionStyleModel",
202 | "description_width": "",
203 | "_view_module": "@jupyter-widgets/base",
204 | "_model_module_version": "1.5.0",
205 | "_view_count": null,
206 | "_view_module_version": "1.2.0",
207 | "_model_module": "@jupyter-widgets/controls"
208 | }
209 | },
210 | "923bc5a1e4d84cf0b1d7be8f10928155": {
211 | "model_module": "@jupyter-widgets/base",
212 | "model_name": "LayoutModel",
213 | "state": {
214 | "_view_name": "LayoutView",
215 | "grid_template_rows": null,
216 | "right": null,
217 | "justify_content": null,
218 | "_view_module": "@jupyter-widgets/base",
219 | "overflow": null,
220 | "_model_module_version": "1.2.0",
221 | "_view_count": null,
222 | "flex_flow": null,
223 | "width": null,
224 | "min_width": null,
225 | "border": null,
226 | "align_items": null,
227 | "bottom": null,
228 | "_model_module": "@jupyter-widgets/base",
229 | "top": null,
230 | "grid_column": null,
231 | "overflow_y": null,
232 | "overflow_x": null,
233 | "grid_auto_flow": null,
234 | "grid_area": null,
235 | "grid_template_columns": null,
236 | "flex": null,
237 | "_model_name": "LayoutModel",
238 | "justify_items": null,
239 | "grid_row": null,
240 | "max_height": null,
241 | "align_content": null,
242 | "visibility": null,
243 | "align_self": null,
244 | "height": null,
245 | "min_height": null,
246 | "padding": null,
247 | "grid_auto_rows": null,
248 | "grid_gap": null,
249 | "max_width": null,
250 | "order": null,
251 | "_view_module_version": "1.2.0",
252 | "grid_template_areas": null,
253 | "object_position": null,
254 | "object_fit": null,
255 | "grid_auto_columns": null,
256 | "margin": null,
257 | "display": null,
258 | "left": null
259 | }
260 | },
261 | "c876ac74bd754112bc68b19e50980a15": {
262 | "model_module": "@jupyter-widgets/controls",
263 | "model_name": "HBoxModel",
264 | "state": {
265 | "_view_name": "HBoxView",
266 | "_dom_classes": [],
267 | "_model_name": "HBoxModel",
268 | "_view_module": "@jupyter-widgets/controls",
269 | "_model_module_version": "1.5.0",
270 | "_view_count": null,
271 | "_view_module_version": "1.5.0",
272 | "box_style": "",
273 | "layout": "IPY_MODEL_0eef6a53d3fc47e5a4afad053874fe78",
274 | "_model_module": "@jupyter-widgets/controls",
275 | "children": [
276 | "IPY_MODEL_d526050960a745d39bec6dbe2567c886",
277 | "IPY_MODEL_59e3844c9e75469788e321dee3485007"
278 | ]
279 | }
280 | },
281 | "0eef6a53d3fc47e5a4afad053874fe78": {
282 | "model_module": "@jupyter-widgets/base",
283 | "model_name": "LayoutModel",
284 | "state": {
285 | "_view_name": "LayoutView",
286 | "grid_template_rows": null,
287 | "right": null,
288 | "justify_content": null,
289 | "_view_module": "@jupyter-widgets/base",
290 | "overflow": null,
291 | "_model_module_version": "1.2.0",
292 | "_view_count": null,
293 | "flex_flow": null,
294 | "width": null,
295 | "min_width": null,
296 | "border": null,
297 | "align_items": null,
298 | "bottom": null,
299 | "_model_module": "@jupyter-widgets/base",
300 | "top": null,
301 | "grid_column": null,
302 | "overflow_y": null,
303 | "overflow_x": null,
304 | "grid_auto_flow": null,
305 | "grid_area": null,
306 | "grid_template_columns": null,
307 | "flex": null,
308 | "_model_name": "LayoutModel",
309 | "justify_items": null,
310 | "grid_row": null,
311 | "max_height": null,
312 | "align_content": null,
313 | "visibility": null,
314 | "align_self": null,
315 | "height": null,
316 | "min_height": null,
317 | "padding": null,
318 | "grid_auto_rows": null,
319 | "grid_gap": null,
320 | "max_width": null,
321 | "order": null,
322 | "_view_module_version": "1.2.0",
323 | "grid_template_areas": null,
324 | "object_position": null,
325 | "object_fit": null,
326 | "grid_auto_columns": null,
327 | "margin": null,
328 | "display": null,
329 | "left": null
330 | }
331 | },
332 | "d526050960a745d39bec6dbe2567c886": {
333 | "model_module": "@jupyter-widgets/controls",
334 | "model_name": "FloatProgressModel",
335 | "state": {
336 | "_view_name": "ProgressView",
337 | "style": "IPY_MODEL_5482cfeb6af44fadbdde19acf3cf5991",
338 | "_dom_classes": [],
339 | "description": "Downloading: 100%",
340 | "_model_name": "FloatProgressModel",
341 | "bar_style": "success",
342 | "max": 501200538,
343 | "_view_module": "@jupyter-widgets/controls",
344 | "_model_module_version": "1.5.0",
345 | "value": 501200538,
346 | "_view_count": null,
347 | "_view_module_version": "1.5.0",
348 | "orientation": "horizontal",
349 | "min": 0,
350 | "description_tooltip": null,
351 | "_model_module": "@jupyter-widgets/controls",
352 | "layout": "IPY_MODEL_ea0f7cb87d104964a9048f74537bfc34"
353 | }
354 | },
355 | "59e3844c9e75469788e321dee3485007": {
356 | "model_module": "@jupyter-widgets/controls",
357 | "model_name": "HTMLModel",
358 | "state": {
359 | "_view_name": "HTMLView",
360 | "style": "IPY_MODEL_1e4db2f343384d64af7e047deb82d57b",
361 | "_dom_classes": [],
362 | "description": "",
363 | "_model_name": "HTMLModel",
364 | "placeholder": "",
365 | "_view_module": "@jupyter-widgets/controls",
366 | "_model_module_version": "1.5.0",
367 | "value": " 501M/501M [00:13<00:00, 36.9MB/s]",
368 | "_view_count": null,
369 | "_view_module_version": "1.5.0",
370 | "description_tooltip": null,
371 | "_model_module": "@jupyter-widgets/controls",
372 | "layout": "IPY_MODEL_7ecf02a3c49b4c0b9e2fa937300eeaab"
373 | }
374 | },
375 | "5482cfeb6af44fadbdde19acf3cf5991": {
376 | "model_module": "@jupyter-widgets/controls",
377 | "model_name": "ProgressStyleModel",
378 | "state": {
379 | "_view_name": "StyleView",
380 | "_model_name": "ProgressStyleModel",
381 | "description_width": "initial",
382 | "_view_module": "@jupyter-widgets/base",
383 | "_model_module_version": "1.5.0",
384 | "_view_count": null,
385 | "_view_module_version": "1.2.0",
386 | "bar_color": null,
387 | "_model_module": "@jupyter-widgets/controls"
388 | }
389 | },
390 | "ea0f7cb87d104964a9048f74537bfc34": {
391 | "model_module": "@jupyter-widgets/base",
392 | "model_name": "LayoutModel",
393 | "state": {
394 | "_view_name": "LayoutView",
395 | "grid_template_rows": null,
396 | "right": null,
397 | "justify_content": null,
398 | "_view_module": "@jupyter-widgets/base",
399 | "overflow": null,
400 | "_model_module_version": "1.2.0",
401 | "_view_count": null,
402 | "flex_flow": null,
403 | "width": null,
404 | "min_width": null,
405 | "border": null,
406 | "align_items": null,
407 | "bottom": null,
408 | "_model_module": "@jupyter-widgets/base",
409 | "top": null,
410 | "grid_column": null,
411 | "overflow_y": null,
412 | "overflow_x": null,
413 | "grid_auto_flow": null,
414 | "grid_area": null,
415 | "grid_template_columns": null,
416 | "flex": null,
417 | "_model_name": "LayoutModel",
418 | "justify_items": null,
419 | "grid_row": null,
420 | "max_height": null,
421 | "align_content": null,
422 | "visibility": null,
423 | "align_self": null,
424 | "height": null,
425 | "min_height": null,
426 | "padding": null,
427 | "grid_auto_rows": null,
428 | "grid_gap": null,
429 | "max_width": null,
430 | "order": null,
431 | "_view_module_version": "1.2.0",
432 | "grid_template_areas": null,
433 | "object_position": null,
434 | "object_fit": null,
435 | "grid_auto_columns": null,
436 | "margin": null,
437 | "display": null,
438 | "left": null
439 | }
440 | },
441 | "1e4db2f343384d64af7e047deb82d57b": {
442 | "model_module": "@jupyter-widgets/controls",
443 | "model_name": "DescriptionStyleModel",
444 | "state": {
445 | "_view_name": "StyleView",
446 | "_model_name": "DescriptionStyleModel",
447 | "description_width": "",
448 | "_view_module": "@jupyter-widgets/base",
449 | "_model_module_version": "1.5.0",
450 | "_view_count": null,
451 | "_view_module_version": "1.2.0",
452 | "_model_module": "@jupyter-widgets/controls"
453 | }
454 | },
455 | "7ecf02a3c49b4c0b9e2fa937300eeaab": {
456 | "model_module": "@jupyter-widgets/base",
457 | "model_name": "LayoutModel",
458 | "state": {
459 | "_view_name": "LayoutView",
460 | "grid_template_rows": null,
461 | "right": null,
462 | "justify_content": null,
463 | "_view_module": "@jupyter-widgets/base",
464 | "overflow": null,
465 | "_model_module_version": "1.2.0",
466 | "_view_count": null,
467 | "flex_flow": null,
468 | "width": null,
469 | "min_width": null,
470 | "border": null,
471 | "align_items": null,
472 | "bottom": null,
473 | "_model_module": "@jupyter-widgets/base",
474 | "top": null,
475 | "grid_column": null,
476 | "overflow_y": null,
477 | "overflow_x": null,
478 | "grid_auto_flow": null,
479 | "grid_area": null,
480 | "grid_template_columns": null,
481 | "flex": null,
482 | "_model_name": "LayoutModel",
483 | "justify_items": null,
484 | "grid_row": null,
485 | "max_height": null,
486 | "align_content": null,
487 | "visibility": null,
488 | "align_self": null,
489 | "height": null,
490 | "min_height": null,
491 | "padding": null,
492 | "grid_auto_rows": null,
493 | "grid_gap": null,
494 | "max_width": null,
495 | "order": null,
496 | "_view_module_version": "1.2.0",
497 | "grid_template_areas": null,
498 | "object_position": null,
499 | "object_fit": null,
500 | "grid_auto_columns": null,
501 | "margin": null,
502 | "display": null,
503 | "left": null
504 | }
505 | },
506 | "c6debb4681744b4eadca8dbfa24ef04b": {
507 | "model_module": "@jupyter-widgets/controls",
508 | "model_name": "HBoxModel",
509 | "state": {
510 | "_view_name": "HBoxView",
511 | "_dom_classes": [],
512 | "_model_name": "HBoxModel",
513 | "_view_module": "@jupyter-widgets/controls",
514 | "_model_module_version": "1.5.0",
515 | "_view_count": null,
516 | "_view_module_version": "1.5.0",
517 | "box_style": "",
518 | "layout": "IPY_MODEL_d73900f43ae24730990a415f3ebe4a22",
519 | "_model_module": "@jupyter-widgets/controls",
520 | "children": [
521 | "IPY_MODEL_519ed4dffbe940acbf344176aa5bb127",
522 | "IPY_MODEL_ca3c81b201d84163af4b64241d1ed080"
523 | ]
524 | }
525 | },
526 | "d73900f43ae24730990a415f3ebe4a22": {
527 | "model_module": "@jupyter-widgets/base",
528 | "model_name": "LayoutModel",
529 | "state": {
530 | "_view_name": "LayoutView",
531 | "grid_template_rows": null,
532 | "right": null,
533 | "justify_content": null,
534 | "_view_module": "@jupyter-widgets/base",
535 | "overflow": null,
536 | "_model_module_version": "1.2.0",
537 | "_view_count": null,
538 | "flex_flow": null,
539 | "width": null,
540 | "min_width": null,
541 | "border": null,
542 | "align_items": null,
543 | "bottom": null,
544 | "_model_module": "@jupyter-widgets/base",
545 | "top": null,
546 | "grid_column": null,
547 | "overflow_y": null,
548 | "overflow_x": null,
549 | "grid_auto_flow": null,
550 | "grid_area": null,
551 | "grid_template_columns": null,
552 | "flex": null,
553 | "_model_name": "LayoutModel",
554 | "justify_items": null,
555 | "grid_row": null,
556 | "max_height": null,
557 | "align_content": null,
558 | "visibility": null,
559 | "align_self": null,
560 | "height": null,
561 | "min_height": null,
562 | "padding": null,
563 | "grid_auto_rows": null,
564 | "grid_gap": null,
565 | "max_width": null,
566 | "order": null,
567 | "_view_module_version": "1.2.0",
568 | "grid_template_areas": null,
569 | "object_position": null,
570 | "object_fit": null,
571 | "grid_auto_columns": null,
572 | "margin": null,
573 | "display": null,
574 | "left": null
575 | }
576 | },
577 | "519ed4dffbe940acbf344176aa5bb127": {
578 | "model_module": "@jupyter-widgets/controls",
579 | "model_name": "FloatProgressModel",
580 | "state": {
581 | "_view_name": "ProgressView",
582 | "style": "IPY_MODEL_d7a865b9b73f48ac8c7fe553f376de72",
583 | "_dom_classes": [],
584 | "description": "Downloading: 100%",
585 | "_model_name": "FloatProgressModel",
586 | "bar_style": "success",
587 | "max": 898823,
588 | "_view_module": "@jupyter-widgets/controls",
589 | "_model_module_version": "1.5.0",
590 | "value": 898823,
591 | "_view_count": null,
592 | "_view_module_version": "1.5.0",
593 | "orientation": "horizontal",
594 | "min": 0,
595 | "description_tooltip": null,
596 | "_model_module": "@jupyter-widgets/controls",
597 | "layout": "IPY_MODEL_a0e4342438d74bb58ae0c119e5cfefaa"
598 | }
599 | },
600 | "ca3c81b201d84163af4b64241d1ed080": {
601 | "model_module": "@jupyter-widgets/controls",
602 | "model_name": "HTMLModel",
603 | "state": {
604 | "_view_name": "HTMLView",
605 | "style": "IPY_MODEL_43f10766b73d410e9e73fccc90270290",
606 | "_dom_classes": [],
607 | "description": "",
608 | "_model_name": "HTMLModel",
609 | "placeholder": "",
610 | "_view_module": "@jupyter-widgets/controls",
611 | "_model_module_version": "1.5.0",
612 | "value": " 899k/899k [00:01<00:00, 731kB/s]",
613 | "_view_count": null,
614 | "_view_module_version": "1.5.0",
615 | "description_tooltip": null,
616 | "_model_module": "@jupyter-widgets/controls",
617 | "layout": "IPY_MODEL_a3e2d9078b11462e9b13ca444d1a5839"
618 | }
619 | },
620 | "d7a865b9b73f48ac8c7fe553f376de72": {
621 | "model_module": "@jupyter-widgets/controls",
622 | "model_name": "ProgressStyleModel",
623 | "state": {
624 | "_view_name": "StyleView",
625 | "_model_name": "ProgressStyleModel",
626 | "description_width": "initial",
627 | "_view_module": "@jupyter-widgets/base",
628 | "_model_module_version": "1.5.0",
629 | "_view_count": null,
630 | "_view_module_version": "1.2.0",
631 | "bar_color": null,
632 | "_model_module": "@jupyter-widgets/controls"
633 | }
634 | },
635 | "a0e4342438d74bb58ae0c119e5cfefaa": {
636 | "model_module": "@jupyter-widgets/base",
637 | "model_name": "LayoutModel",
638 | "state": {
639 | "_view_name": "LayoutView",
640 | "grid_template_rows": null,
641 | "right": null,
642 | "justify_content": null,
643 | "_view_module": "@jupyter-widgets/base",
644 | "overflow": null,
645 | "_model_module_version": "1.2.0",
646 | "_view_count": null,
647 | "flex_flow": null,
648 | "width": null,
649 | "min_width": null,
650 | "border": null,
651 | "align_items": null,
652 | "bottom": null,
653 | "_model_module": "@jupyter-widgets/base",
654 | "top": null,
655 | "grid_column": null,
656 | "overflow_y": null,
657 | "overflow_x": null,
658 | "grid_auto_flow": null,
659 | "grid_area": null,
660 | "grid_template_columns": null,
661 | "flex": null,
662 | "_model_name": "LayoutModel",
663 | "justify_items": null,
664 | "grid_row": null,
665 | "max_height": null,
666 | "align_content": null,
667 | "visibility": null,
668 | "align_self": null,
669 | "height": null,
670 | "min_height": null,
671 | "padding": null,
672 | "grid_auto_rows": null,
673 | "grid_gap": null,
674 | "max_width": null,
675 | "order": null,
676 | "_view_module_version": "1.2.0",
677 | "grid_template_areas": null,
678 | "object_position": null,
679 | "object_fit": null,
680 | "grid_auto_columns": null,
681 | "margin": null,
682 | "display": null,
683 | "left": null
684 | }
685 | },
686 | "43f10766b73d410e9e73fccc90270290": {
687 | "model_module": "@jupyter-widgets/controls",
688 | "model_name": "DescriptionStyleModel",
689 | "state": {
690 | "_view_name": "StyleView",
691 | "_model_name": "DescriptionStyleModel",
692 | "description_width": "",
693 | "_view_module": "@jupyter-widgets/base",
694 | "_model_module_version": "1.5.0",
695 | "_view_count": null,
696 | "_view_module_version": "1.2.0",
697 | "_model_module": "@jupyter-widgets/controls"
698 | }
699 | },
700 | "a3e2d9078b11462e9b13ca444d1a5839": {
701 | "model_module": "@jupyter-widgets/base",
702 | "model_name": "LayoutModel",
703 | "state": {
704 | "_view_name": "LayoutView",
705 | "grid_template_rows": null,
706 | "right": null,
707 | "justify_content": null,
708 | "_view_module": "@jupyter-widgets/base",
709 | "overflow": null,
710 | "_model_module_version": "1.2.0",
711 | "_view_count": null,
712 | "flex_flow": null,
713 | "width": null,
714 | "min_width": null,
715 | "border": null,
716 | "align_items": null,
717 | "bottom": null,
718 | "_model_module": "@jupyter-widgets/base",
719 | "top": null,
720 | "grid_column": null,
721 | "overflow_y": null,
722 | "overflow_x": null,
723 | "grid_auto_flow": null,
724 | "grid_area": null,
725 | "grid_template_columns": null,
726 | "flex": null,
727 | "_model_name": "LayoutModel",
728 | "justify_items": null,
729 | "grid_row": null,
730 | "max_height": null,
731 | "align_content": null,
732 | "visibility": null,
733 | "align_self": null,
734 | "height": null,
735 | "min_height": null,
736 | "padding": null,
737 | "grid_auto_rows": null,
738 | "grid_gap": null,
739 | "max_width": null,
740 | "order": null,
741 | "_view_module_version": "1.2.0",
742 | "grid_template_areas": null,
743 | "object_position": null,
744 | "object_fit": null,
745 | "grid_auto_columns": null,
746 | "margin": null,
747 | "display": null,
748 | "left": null
749 | }
750 | },
751 | "16b93d1b0a564465bdb7ea62db94be57": {
752 | "model_module": "@jupyter-widgets/controls",
753 | "model_name": "HBoxModel",
754 | "state": {
755 | "_view_name": "HBoxView",
756 | "_dom_classes": [],
757 | "_model_name": "HBoxModel",
758 | "_view_module": "@jupyter-widgets/controls",
759 | "_model_module_version": "1.5.0",
760 | "_view_count": null,
761 | "_view_module_version": "1.5.0",
762 | "box_style": "",
763 | "layout": "IPY_MODEL_d6cf9e4a73c446e794232945fbff6470",
764 | "_model_module": "@jupyter-widgets/controls",
765 | "children": [
766 | "IPY_MODEL_2bd92b7bb9fe4ad6871f78c4f70d0c0f",
767 | "IPY_MODEL_c57fce150585428e89d8e88edf57d6b0"
768 | ]
769 | }
770 | },
771 | "d6cf9e4a73c446e794232945fbff6470": {
772 | "model_module": "@jupyter-widgets/base",
773 | "model_name": "LayoutModel",
774 | "state": {
775 | "_view_name": "LayoutView",
776 | "grid_template_rows": null,
777 | "right": null,
778 | "justify_content": null,
779 | "_view_module": "@jupyter-widgets/base",
780 | "overflow": null,
781 | "_model_module_version": "1.2.0",
782 | "_view_count": null,
783 | "flex_flow": null,
784 | "width": null,
785 | "min_width": null,
786 | "border": null,
787 | "align_items": null,
788 | "bottom": null,
789 | "_model_module": "@jupyter-widgets/base",
790 | "top": null,
791 | "grid_column": null,
792 | "overflow_y": null,
793 | "overflow_x": null,
794 | "grid_auto_flow": null,
795 | "grid_area": null,
796 | "grid_template_columns": null,
797 | "flex": null,
798 | "_model_name": "LayoutModel",
799 | "justify_items": null,
800 | "grid_row": null,
801 | "max_height": null,
802 | "align_content": null,
803 | "visibility": null,
804 | "align_self": null,
805 | "height": null,
806 | "min_height": null,
807 | "padding": null,
808 | "grid_auto_rows": null,
809 | "grid_gap": null,
810 | "max_width": null,
811 | "order": null,
812 | "_view_module_version": "1.2.0",
813 | "grid_template_areas": null,
814 | "object_position": null,
815 | "object_fit": null,
816 | "grid_auto_columns": null,
817 | "margin": null,
818 | "display": null,
819 | "left": null
820 | }
821 | },
822 | "2bd92b7bb9fe4ad6871f78c4f70d0c0f": {
823 | "model_module": "@jupyter-widgets/controls",
824 | "model_name": "FloatProgressModel",
825 | "state": {
826 | "_view_name": "ProgressView",
827 | "style": "IPY_MODEL_2fa9d9029453427d919a6206f91cecbf",
828 | "_dom_classes": [],
829 | "description": "Downloading: 100%",
830 | "_model_name": "FloatProgressModel",
831 | "bar_style": "success",
832 | "max": 456318,
833 | "_view_module": "@jupyter-widgets/controls",
834 | "_model_module_version": "1.5.0",
835 | "value": 456318,
836 | "_view_count": null,
837 | "_view_module_version": "1.5.0",
838 | "orientation": "horizontal",
839 | "min": 0,
840 | "description_tooltip": null,
841 | "_model_module": "@jupyter-widgets/controls",
842 | "layout": "IPY_MODEL_b160a13db10a40b8882ec4a9dd781c30"
843 | }
844 | },
845 | "c57fce150585428e89d8e88edf57d6b0": {
846 | "model_module": "@jupyter-widgets/controls",
847 | "model_name": "HTMLModel",
848 | "state": {
849 | "_view_name": "HTMLView",
850 | "style": "IPY_MODEL_afd1c4cf41aa4c34b7694751ea2db103",
851 | "_dom_classes": [],
852 | "description": "",
853 | "_model_name": "HTMLModel",
854 | "placeholder": "",
855 | "_view_module": "@jupyter-widgets/controls",
856 | "_model_module_version": "1.5.0",
857 | "value": " 456k/456k [00:00<00:00, 1.39MB/s]",
858 | "_view_count": null,
859 | "_view_module_version": "1.5.0",
860 | "description_tooltip": null,
861 | "_model_module": "@jupyter-widgets/controls",
862 | "layout": "IPY_MODEL_8edb7f445fc642558c87a63fb59ef66f"
863 | }
864 | },
865 | "2fa9d9029453427d919a6206f91cecbf": {
866 | "model_module": "@jupyter-widgets/controls",
867 | "model_name": "ProgressStyleModel",
868 | "state": {
869 | "_view_name": "StyleView",
870 | "_model_name": "ProgressStyleModel",
871 | "description_width": "initial",
872 | "_view_module": "@jupyter-widgets/base",
873 | "_model_module_version": "1.5.0",
874 | "_view_count": null,
875 | "_view_module_version": "1.2.0",
876 | "bar_color": null,
877 | "_model_module": "@jupyter-widgets/controls"
878 | }
879 | },
880 | "b160a13db10a40b8882ec4a9dd781c30": {
881 | "model_module": "@jupyter-widgets/base",
882 | "model_name": "LayoutModel",
883 | "state": {
884 | "_view_name": "LayoutView",
885 | "grid_template_rows": null,
886 | "right": null,
887 | "justify_content": null,
888 | "_view_module": "@jupyter-widgets/base",
889 | "overflow": null,
890 | "_model_module_version": "1.2.0",
891 | "_view_count": null,
892 | "flex_flow": null,
893 | "width": null,
894 | "min_width": null,
895 | "border": null,
896 | "align_items": null,
897 | "bottom": null,
898 | "_model_module": "@jupyter-widgets/base",
899 | "top": null,
900 | "grid_column": null,
901 | "overflow_y": null,
902 | "overflow_x": null,
903 | "grid_auto_flow": null,
904 | "grid_area": null,
905 | "grid_template_columns": null,
906 | "flex": null,
907 | "_model_name": "LayoutModel",
908 | "justify_items": null,
909 | "grid_row": null,
910 | "max_height": null,
911 | "align_content": null,
912 | "visibility": null,
913 | "align_self": null,
914 | "height": null,
915 | "min_height": null,
916 | "padding": null,
917 | "grid_auto_rows": null,
918 | "grid_gap": null,
919 | "max_width": null,
920 | "order": null,
921 | "_view_module_version": "1.2.0",
922 | "grid_template_areas": null,
923 | "object_position": null,
924 | "object_fit": null,
925 | "grid_auto_columns": null,
926 | "margin": null,
927 | "display": null,
928 | "left": null
929 | }
930 | },
931 | "afd1c4cf41aa4c34b7694751ea2db103": {
932 | "model_module": "@jupyter-widgets/controls",
933 | "model_name": "DescriptionStyleModel",
934 | "state": {
935 | "_view_name": "StyleView",
936 | "_model_name": "DescriptionStyleModel",
937 | "description_width": "",
938 | "_view_module": "@jupyter-widgets/base",
939 | "_model_module_version": "1.5.0",
940 | "_view_count": null,
941 | "_view_module_version": "1.2.0",
942 | "_model_module": "@jupyter-widgets/controls"
943 | }
944 | },
945 | "8edb7f445fc642558c87a63fb59ef66f": {
946 | "model_module": "@jupyter-widgets/base",
947 | "model_name": "LayoutModel",
948 | "state": {
949 | "_view_name": "LayoutView",
950 | "grid_template_rows": null,
951 | "right": null,
952 | "justify_content": null,
953 | "_view_module": "@jupyter-widgets/base",
954 | "overflow": null,
955 | "_model_module_version": "1.2.0",
956 | "_view_count": null,
957 | "flex_flow": null,
958 | "width": null,
959 | "min_width": null,
960 | "border": null,
961 | "align_items": null,
962 | "bottom": null,
963 | "_model_module": "@jupyter-widgets/base",
964 | "top": null,
965 | "grid_column": null,
966 | "overflow_y": null,
967 | "overflow_x": null,
968 | "grid_auto_flow": null,
969 | "grid_area": null,
970 | "grid_template_columns": null,
971 | "flex": null,
972 | "_model_name": "LayoutModel",
973 | "justify_items": null,
974 | "grid_row": null,
975 | "max_height": null,
976 | "align_content": null,
977 | "visibility": null,
978 | "align_self": null,
979 | "height": null,
980 | "min_height": null,
981 | "padding": null,
982 | "grid_auto_rows": null,
983 | "grid_gap": null,
984 | "max_width": null,
985 | "order": null,
986 | "_view_module_version": "1.2.0",
987 | "grid_template_areas": null,
988 | "object_position": null,
989 | "object_fit": null,
990 | "grid_auto_columns": null,
991 | "margin": null,
992 | "display": null,
993 | "left": null
994 | }
995 | }
996 | }
997 | }
998 | },
999 | "cells": [
1000 | {
1001 | "cell_type": "markdown",
1002 | "metadata": {
1003 | "id": "KjJ3TLjn9ZrU",
1004 | "colab_type": "text"
1005 | },
1006 | "source": [
1007 | "Installing Packages\n",
1008 | "\n"
1009 | ]
1010 | },
1011 | {
1012 | "cell_type": "code",
1013 | "metadata": {
1014 | "id": "pRfa0HiSCMxu",
1015 | "colab_type": "code",
1016 | "colab": {
1017 | "base_uri": "https://localhost:8080/",
1018 | "height": 1000
1019 | },
1020 | "outputId": "7418dca2-a78c-403c-86f9-65931804dc41"
1021 | },
1022 | "source": [
1023 | "!pip install --upgrade transformers\n",
1024 | "!pip install simpletransformers\n",
1025 | "# memory footprint support libraries/code\n",
1026 | "!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi\n",
1027 | "!pip install gputil\n",
1028 | "!pip install psutil\n",
1029 | "!pip install humanize"
1030 | ],
1031 | "execution_count": null,
1032 | "outputs": [
1033 | {
1034 | "output_type": "stream",
1035 | "text": [
1036 | "Collecting transformers\n",
1037 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl (769kB)\n",
1038 | "\u001b[K |████████████████████████████████| 778kB 4.5MB/s \n",
1039 | "\u001b[?25hCollecting tokenizers==0.8.1.rc1\n",
1040 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/d0/30d5f8d221a0ed981a186c8eb986ce1c94e3a6e87f994eae9f4aa5250217/tokenizers-0.8.1rc1-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)\n",
1041 | "\u001b[K |████████████████████████████████| 3.0MB 13.2MB/s \n",
1042 | "\u001b[?25hRequirement already satisfied, skipping upgrade: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n",
1043 | "Requirement already satisfied, skipping upgrade: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n",
1044 | "Requirement already satisfied, skipping upgrade: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)\n",
1045 | "Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n",
1046 | "Requirement already satisfied, skipping upgrade: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n",
1047 | "Collecting sacremoses\n",
1048 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n",
1049 | "\u001b[K |████████████████████████████████| 890kB 30.3MB/s \n",
1050 | "\u001b[?25hRequirement already satisfied, skipping upgrade: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n",
1051 | "Collecting sentencepiece!=0.1.92\n",
1052 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\n",
1053 | "\u001b[K |████████████████████████████████| 1.1MB 50.5MB/s \n",
1054 | "\u001b[?25hRequirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)\n",
1055 | "Requirement already satisfied, skipping upgrade: six in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (1.12.0)\n",
1056 | "Requirement already satisfied, skipping upgrade: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)\n",
1057 | "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n",
1058 | "Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)\n",
1059 | "Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n",
1060 | "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)\n",
1061 | "Requirement already satisfied, skipping upgrade: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n",
1062 | "Requirement already satisfied, skipping upgrade: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.16.0)\n",
1063 | "Building wheels for collected packages: sacremoses\n",
1064 | " Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
1065 | " Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=0398548924f4d2723f574b443ce65ba40ca1abf02a9844573ed2d4457a182ef7\n",
1066 | " Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n",
1067 | "Successfully built sacremoses\n",
1068 | "Installing collected packages: tokenizers, sacremoses, sentencepiece, transformers\n",
1069 | "Successfully installed sacremoses-0.0.43 sentencepiece-0.1.91 tokenizers-0.8.1rc1 transformers-3.0.2\n",
1070 | "Collecting simpletransformers\n",
1071 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ec/60/e52c4b3c1c4c2d0fa3eb5e4fc1dafb49f6f67971edeab8cb22d794e2e7bd/simpletransformers-0.44.0-py3-none-any.whl (194kB)\n",
1072 | "\u001b[K |████████████████████████████████| 204kB 4.4MB/s \n",
1073 | "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.0.5)\n",
1074 | "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (0.22.2.post1)\n",
1075 | "Collecting tqdm>=4.47.0\n",
1076 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/af/88/7b0ea5fa8192d1733dea459a9e3059afc87819cb4072c43263f2ec7ab768/tqdm-4.48.0-py2.py3-none-any.whl (67kB)\n",
1077 | "\u001b[K |████████████████████████████████| 71kB 7.8MB/s \n",
1078 | "\u001b[?25hRequirement already satisfied: transformers>=2.11.0 in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (3.0.2)\n",
1079 | "Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (2019.12.20)\n",
1080 | "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.18.5)\n",
1081 | "Collecting tensorboardx\n",
1082 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)\n",
1083 | "\u001b[K |████████████████████████████████| 317kB 13.4MB/s \n",
1084 | "\u001b[?25hRequirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.4.1)\n",
1085 | "Requirement already satisfied: tokenizers in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (0.8.1rc1)\n",
1086 | "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (2.23.0)\n",
1087 | "Collecting seqeval\n",
1088 | " Downloading https://files.pythonhosted.org/packages/34/91/068aca8d60ce56dd9ba4506850e876aba5e66a6f2f29aa223224b50df0de/seqeval-0.0.12.tar.gz\n",
1089 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->simpletransformers) (2018.9)\n",
1090 | "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->simpletransformers) (2.8.1)\n",
1091 | "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->simpletransformers) (0.16.0)\n",
1092 | "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers>=2.11.0->simpletransformers) (0.7)\n",
1093 | "Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers>=2.11.0->simpletransformers) (20.4)\n",
1094 | "Requirement already satisfied: sentencepiece!=0.1.92 in /usr/local/lib/python3.6/dist-packages (from transformers>=2.11.0->simpletransformers) (0.1.91)\n",
1095 | "Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers>=2.11.0->simpletransformers) (0.0.43)\n",
1096 | "Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers>=2.11.0->simpletransformers) (3.0.12)\n",
1097 | "Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardx->simpletransformers) (3.12.2)\n",
1098 | "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from tensorboardx->simpletransformers) (1.12.0)\n",
1099 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (1.24.3)\n",
1100 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (3.0.4)\n",
1101 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (2.10)\n",
1102 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (2020.6.20)\n",
1103 | "Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->simpletransformers) (2.3.1)\n",
1104 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers>=2.11.0->simpletransformers) (2.4.7)\n",
1105 | "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers>=2.11.0->simpletransformers) (7.1.2)\n",
1106 | "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.8.0->tensorboardx->simpletransformers) (49.1.0)\n",
1107 | "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->simpletransformers) (2.10.0)\n",
1108 | "Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->simpletransformers) (1.0.8)\n",
1109 | "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->simpletransformers) (1.1.2)\n",
1110 | "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->simpletransformers) (3.13)\n",
1111 | "Building wheels for collected packages: seqeval\n",
1112 | " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
1113 | " Created wheel for seqeval: filename=seqeval-0.0.12-cp36-none-any.whl size=7424 sha256=f54db22648bd99f606c3d4734c472b070822030861d7ce3de154678f04c9485c\n",
1114 | " Stored in directory: /root/.cache/pip/wheels/4f/32/0a/df3b340a82583566975377d65e724895b3fad101a3fb729f68\n",
1115 | "Successfully built seqeval\n",
1116 | "Installing collected packages: tqdm, tensorboardx, seqeval, simpletransformers\n",
1117 | " Found existing installation: tqdm 4.41.1\n",
1118 | " Uninstalling tqdm-4.41.1:\n",
1119 | " Successfully uninstalled tqdm-4.41.1\n",
1120 | "Successfully installed seqeval-0.0.12 simpletransformers-0.44.0 tensorboardx-2.1 tqdm-4.48.0\n"
1121 | ],
1122 | "name": "stdout"
1123 | },
1124 | {
1125 | "output_type": "display_data",
1126 | "data": {
1127 | "application/vnd.colab-display-data+json": {
1128 | "pip_warning": {
1129 | "packages": [
1130 | "tqdm"
1131 | ]
1132 | }
1133 | }
1134 | },
1135 | "metadata": {
1136 | "tags": []
1137 | }
1138 | },
1139 | {
1140 | "output_type": "stream",
1141 | "text": [
1142 | "Collecting gputil\n",
1143 | " Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz\n",
1144 | "Building wheels for collected packages: gputil\n",
1145 | " Building wheel for gputil (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
1146 | " Created wheel for gputil: filename=GPUtil-1.4.0-cp36-none-any.whl size=7413 sha256=a086d4a76aa82112559fe47c0d965f500f749a7cccadba2dd40cc268f236d1cf\n",
1147 | " Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd\n",
1148 | "Successfully built gputil\n",
1149 | "Installing collected packages: gputil\n",
1150 | "Successfully installed gputil-1.4.0\n",
1151 | "Requirement already satisfied: psutil in /usr/local/lib/python3.6/dist-packages (5.4.8)\n",
1152 | "Requirement already satisfied: humanize in /usr/local/lib/python3.6/dist-packages (0.5.1)\n"
1153 | ],
1154 | "name": "stdout"
1155 | }
1156 | ]
1157 | },
1158 | {
1159 | "cell_type": "markdown",
1160 | "metadata": {
1161 | "id": "GRciAgBD9r99",
1162 | "colab_type": "text"
1163 | },
1164 | "source": [
1165 | "Importing Libraries\n"
1166 | ]
1167 | },
1168 | {
1169 | "cell_type": "code",
1170 | "metadata": {
1171 | "id": "Ex6pimGgCSc9",
1172 | "colab_type": "code",
1173 | "colab": {}
1174 | },
1175 | "source": [
1176 | "import psutil\n",
1177 | "import humanize\n",
1178 | "import os\n",
1179 | "import GPUtil as GPU\n",
1180 | "\n",
1181 | "import numpy as np\n",
1182 | "import pandas as pd\n",
1183 | "from google.colab import files\n",
1184 | "from tqdm import tqdm\n",
1185 | "import warnings\n",
1186 | "warnings.simplefilter('ignore')\n",
1187 | "import gc\n",
1188 | "from scipy.special import softmax\n",
1189 | "from simpletransformers.classification import ClassificationModel\n",
1190 | "from sklearn.model_selection import train_test_split, StratifiedKFold, KFold \n",
1191 | "import sklearn\n",
1192 | "from sklearn.metrics import log_loss\n",
1193 | "from sklearn.metrics import *\n",
1194 | "from sklearn.model_selection import *\n",
1195 | "import re\n",
1196 | "import random\n",
1197 | "import torch\n",
1198 | "pd.options.display.max_colwidth = 200\n",
1199 | "\n",
1200 | "#choose the same seed to assure that our model will be roproducible\n",
1201 | "\n",
1202 | "def seed_all(seed_value):\n",
1203 | " random.seed(seed_value) # Python\n",
1204 | " np.random.seed(seed_value) # cpu vars\n",
1205 | " torch.manual_seed(seed_value) # cpu vars\n",
1206 | " \n",
1207 | " if torch.cuda.is_available(): \n",
1208 | " torch.cuda.manual_seed(seed_value)\n",
1209 | " torch.cuda.manual_seed_all(seed_value) # gpu vars\n",
1210 | " torch.backends.cudnn.deterministic = True #needed\n",
1211 | " torch.backends.cudnn.benchmark = False\n",
1212 | "\n",
1213 | "seed_all(2)"
1214 | ],
1215 | "execution_count": null,
1216 | "outputs": []
1217 | },
1218 | {
1219 | "cell_type": "markdown",
1220 | "metadata": {
1221 | "id": "lO1WQEpfADpn",
1222 | "colab_type": "text"
1223 | },
1224 | "source": [
1225 | "Reading Data "
1226 | ]
1227 | },
1228 | {
1229 | "cell_type": "code",
1230 | "metadata": {
1231 | "id": "i3TVdkVZCShi",
1232 | "colab_type": "code",
1233 | "colab": {}
1234 | },
1235 | "source": [
1236 | "import pandas as pd\n",
1237 | "#We consider that our data is a csv file (2 columns : text and label)\n",
1238 | "#using pandas function (read_csv) to read the file\n",
1239 | "train=pd.read_csv()\n",
1240 | "\n",
1241 | "feat_cols = \"text\"\n"
1242 | ],
1243 | "execution_count": null,
1244 | "outputs": []
1245 | },
1246 | {
1247 | "cell_type": "markdown",
1248 | "metadata": {
1249 | "id": "1v1gunP6AW8f",
1250 | "colab_type": "text"
1251 | },
1252 | "source": [
1253 | "#Verify the topic classes in the data\n"
1254 | ]
1255 | },
1256 | {
1257 | "cell_type": "code",
1258 | "metadata": {
1259 | "id": "5GuR6k5zCSoz",
1260 | "colab_type": "code",
1261 | "colab": {
1262 | "base_uri": "https://localhost:8080/",
1263 | "height": 34
1264 | },
1265 | "outputId": "d95383ae-77b2-46e5-8ea9-be2f4abff35c"
1266 | },
1267 | "source": [
1268 | "train.label.unique()"
1269 | ],
1270 | "execution_count": null,
1271 | "outputs": [
1272 | {
1273 | "output_type": "execute_result",
1274 | "data": {
1275 | "text/plain": [
1276 | "array(['art', 'politics', 'health', 'tourism'], dtype=object)"
1277 | ]
1278 | },
1279 | "metadata": {
1280 | "tags": []
1281 | },
1282 | "execution_count": 6
1283 | }
1284 | ]
1285 | },
1286 | {
1287 | "cell_type": "code",
1288 | "metadata": {
1289 | "id": "U4BTqOO1CSu8",
1290 | "colab_type": "code",
1291 | "colab": {
1292 | "base_uri": "https://localhost:8080/",
1293 | "height": 158
1294 | },
1295 | "outputId": "afff28f1-14ba-4731-8770-7a5a2880b15f"
1296 | },
1297 | "source": [
1298 | "label_cols = ['art', 'politics', 'health', 'tourism']\n",
1299 | "train.head()\n",
1300 | "l=['art', 'politics', 'health', 'tourism']\n",
1301 | "# Get the numerical ids of column label\n",
1302 | "train['label']=train.label.astype('category')\n",
1303 | "\n",
1304 | "Y = train.label.cat.codes\n",
1305 | "train['label']=Y\n",
1306 | "# Print initial shape\n",
1307 | "print(Y.shape)\n",
1308 | "from keras.utils import to_categorical\n",
1309 | "# One-hot encode the indexes\n",
1310 | "Y = to_categorical(Y)\n",
1311 | "\n",
1312 | "# Check the new shape of the variable\n",
1313 | "print(Y.shape)\n",
1314 | "\n",
1315 | "# Print the first 5 rows\n",
1316 | "print(Y[0:5])\n",
1317 | "for i in range(len(l)) : \n",
1318 | " train[l[i]] = Y[:,i]"
1319 | ],
1320 | "execution_count": null,
1321 | "outputs": [
1322 | {
1323 | "output_type": "stream",
1324 | "text": [
1325 | "(4000,)\n",
1326 | "(4000, 4)\n",
1327 | "[[1. 0. 0. 0.]\n",
1328 | " [1. 0. 0. 0.]\n",
1329 | " [1. 0. 0. 0.]\n",
1330 | " [1. 0. 0. 0.]\n",
1331 | " [1. 0. 0. 0.]]\n"
1332 | ],
1333 | "name": "stdout"
1334 | },
1335 | {
1336 | "output_type": "stream",
1337 | "text": [
1338 | "Using TensorFlow backend.\n"
1339 | ],
1340 | "name": "stderr"
1341 | }
1342 | ]
1343 | },
1344 | {
1345 | "cell_type": "code",
1346 | "metadata": {
1347 | "id": "NpCXIMJgETqw",
1348 | "colab_type": "code",
1349 | "colab": {}
1350 | },
1351 | "source": [
1352 | "#It is too simple to use the ClassificationModel from simpletransformes :\n",
1353 | " #ClassificationModel('Architecture', 'model shortcut name', use_cuda=True,num_labels=4)\n",
1354 | " #Architecture : Bert , Roberta , Xlnet , Xlm...\n",
1355 | " #shortcut name models for Roberta : roberta-base , roberta-large .... \n",
1356 | " #more details on : https://huggingface.co/transformers/pretrained_models.html \n",
1357 | " "
1358 | ],
1359 | "execution_count": null,
1360 | "outputs": []
1361 | },
1362 | {
1363 | "cell_type": "code",
1364 | "metadata": {
1365 | "id": "tnq3AVhxENnQ",
1366 | "colab_type": "code",
1367 | "colab": {}
1368 | },
1369 | "source": [
1370 | "#using KFOLD Cross Validation is important to test our model"
1371 | ],
1372 | "execution_count": null,
1373 | "outputs": []
1374 | },
1375 | {
1376 | "cell_type": "code",
1377 | "metadata": {
1378 | "id": "PP2iZ1B_CS0w",
1379 | "colab_type": "code",
1380 | "colab": {
1381 | "base_uri": "https://localhost:8080/",
1382 | "height": 804,
1383 | "referenced_widgets": [
1384 | "dcefe0f8a6a74548b5688d060624e960",
1385 | "9eda6253942a45fb96f869e054a599ec",
1386 | "c84b59de4294480aaa9d233e41ccbed1",
1387 | "448feec51d264190898f92f6481feb15",
1388 | "9f484421dc8943b8afc9419134a6de4e",
1389 | "67bc3d8ed4bb45a3b0beb3b83a0eddc0",
1390 | "e3bf7cc4668d47c2908fddadb9d23b46",
1391 | "923bc5a1e4d84cf0b1d7be8f10928155",
1392 | "c876ac74bd754112bc68b19e50980a15",
1393 | "0eef6a53d3fc47e5a4afad053874fe78",
1394 | "d526050960a745d39bec6dbe2567c886",
1395 | "59e3844c9e75469788e321dee3485007",
1396 | "5482cfeb6af44fadbdde19acf3cf5991",
1397 | "ea0f7cb87d104964a9048f74537bfc34",
1398 | "1e4db2f343384d64af7e047deb82d57b",
1399 | "7ecf02a3c49b4c0b9e2fa937300eeaab",
1400 | "c6debb4681744b4eadca8dbfa24ef04b",
1401 | "d73900f43ae24730990a415f3ebe4a22",
1402 | "519ed4dffbe940acbf344176aa5bb127",
1403 | "ca3c81b201d84163af4b64241d1ed080",
1404 | "d7a865b9b73f48ac8c7fe553f376de72",
1405 | "a0e4342438d74bb58ae0c119e5cfefaa",
1406 | "43f10766b73d410e9e73fccc90270290",
1407 | "a3e2d9078b11462e9b13ca444d1a5839",
1408 | "16b93d1b0a564465bdb7ea62db94be57",
1409 | "d6cf9e4a73c446e794232945fbff6470",
1410 | "2bd92b7bb9fe4ad6871f78c4f70d0c0f",
1411 | "c57fce150585428e89d8e88edf57d6b0",
1412 | "2fa9d9029453427d919a6206f91cecbf",
1413 | "b160a13db10a40b8882ec4a9dd781c30",
1414 | "afd1c4cf41aa4c34b7694751ea2db103",
1415 | "8edb7f445fc642558c87a63fb59ef66f"
1416 | ]
1417 | },
1418 | "outputId": "2f394a08-d830-45ba-f2d4-b21e0b6f6e6c"
1419 | },
1420 | "source": [
1421 | "%%time\n",
1422 | "err=[]\n",
1423 | "y_pred_tot=[]\n",
1424 | "\n",
1425 | "fold=StratifiedKFold(n_splits=5, shuffle=True, random_state=1997)\n",
1426 | "i=1\n",
1427 | "for train_index, test_index in fold.split(train,train['label']):\n",
1428 | " train1_trn, train1_val = train.iloc[train_index], train.iloc[test_index]\n",
1429 | " model = ClassificationModel('roberta', 'roberta-base', use_cuda=True,num_labels=4, args={\n",
1430 | " 'train_batch_size':16,\n",
1431 | " 'reprocess_input_data': True,\n",
1432 | " 'overwrite_output_dir': True,\n",
1433 | " 'fp16': False,\n",
1434 | " 'do_lower_case': False,\n",
1435 | " 'num_train_epochs': 4,\n",
1436 | " 'max_seq_length': 128,\n",
1437 | " 'regression': False,\n",
1438 | " 'manual_seed': 1997,\n",
1439 | " \"learning_rate\":2e-5,\n",
1440 | " 'weight_decay':0,\n",
1441 | " \"save_eval_checkpoints\": True,\n",
1442 | " \"save_model_every_epoch\": False,\n",
1443 | " \"silent\": True})\n",
1444 | " model.train_model(train1_trn)\n",
1445 | " raw_outputs_val = model.eval_model(train1_val)[1]\n",
1446 | " raw_outputs_vals = softmax(raw_outputs_val,axis=1)\n",
1447 | " print(f\"Log_Loss: {log_loss(train1_val['label'], raw_outputs_vals)}\")\n",
1448 | " err.append(log_loss(train1_val['label'], raw_outputs_vals))\n"
1449 | ],
1450 | "execution_count": null,
1451 | "outputs": [
1452 | {
1453 | "output_type": "display_data",
1454 | "data": {
1455 | "application/vnd.jupyter.widget-view+json": {
1456 | "model_id": "dcefe0f8a6a74548b5688d060624e960",
1457 | "version_minor": 0,
1458 | "version_major": 2
1459 | },
1460 | "text/plain": [
1461 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=481.0, style=ProgressStyle(description_…"
1462 | ]
1463 | },
1464 | "metadata": {
1465 | "tags": []
1466 | }
1467 | },
1468 | {
1469 | "output_type": "stream",
1470 | "text": [
1471 | "\n"
1472 | ],
1473 | "name": "stdout"
1474 | },
1475 | {
1476 | "output_type": "display_data",
1477 | "data": {
1478 | "application/vnd.jupyter.widget-view+json": {
1479 | "model_id": "c876ac74bd754112bc68b19e50980a15",
1480 | "version_minor": 0,
1481 | "version_major": 2
1482 | },
1483 | "text/plain": [
1484 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=501200538.0, style=ProgressStyle(descri…"
1485 | ]
1486 | },
1487 | "metadata": {
1488 | "tags": []
1489 | }
1490 | },
1491 | {
1492 | "output_type": "stream",
1493 | "text": [
1494 | "\n"
1495 | ],
1496 | "name": "stdout"
1497 | },
1498 | {
1499 | "output_type": "stream",
1500 | "text": [
1501 | "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']\n",
1502 | "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
1503 | "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
1504 | "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']\n",
1505 | "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
1506 | ],
1507 | "name": "stderr"
1508 | },
1509 | {
1510 | "output_type": "display_data",
1511 | "data": {
1512 | "application/vnd.jupyter.widget-view+json": {
1513 | "model_id": "c6debb4681744b4eadca8dbfa24ef04b",
1514 | "version_minor": 0,
1515 | "version_major": 2
1516 | },
1517 | "text/plain": [
1518 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…"
1519 | ]
1520 | },
1521 | "metadata": {
1522 | "tags": []
1523 | }
1524 | },
1525 | {
1526 | "output_type": "stream",
1527 | "text": [
1528 | "\n"
1529 | ],
1530 | "name": "stdout"
1531 | },
1532 | {
1533 | "output_type": "display_data",
1534 | "data": {
1535 | "application/vnd.jupyter.widget-view+json": {
1536 | "model_id": "16b93d1b0a564465bdb7ea62db94be57",
1537 | "version_minor": 0,
1538 | "version_major": 2
1539 | },
1540 | "text/plain": [
1541 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…"
1542 | ]
1543 | },
1544 | "metadata": {
1545 | "tags": []
1546 | }
1547 | },
1548 | {
1549 | "output_type": "stream",
1550 | "text": [
1551 | "\n",
1552 | "Log_Loss: 0.32932861055553075\n"
1553 | ],
1554 | "name": "stdout"
1555 | },
1556 | {
1557 | "output_type": "stream",
1558 | "text": [
1559 | "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']\n",
1560 | "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
1561 | "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
1562 | "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']\n",
1563 | "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
1564 | ],
1565 | "name": "stderr"
1566 | },
1567 | {
1568 | "output_type": "stream",
1569 | "text": [
1570 | "Log_Loss: 0.31324721513781695\n"
1571 | ],
1572 | "name": "stdout"
1573 | },
1574 | {
1575 | "output_type": "stream",
1576 | "text": [
1577 | "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']\n",
1578 | "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
1579 | "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
1580 | "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']\n",
1581 | "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
1582 | ],
1583 | "name": "stderr"
1584 | },
1585 | {
1586 | "output_type": "stream",
1587 | "text": [
1588 | "Log_Loss: 0.357522397101493\n"
1589 | ],
1590 | "name": "stdout"
1591 | },
1592 | {
1593 | "output_type": "stream",
1594 | "text": [
1595 | "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']\n",
1596 | "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
1597 | "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
1598 | "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']\n",
1599 | "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
1600 | ],
1601 | "name": "stderr"
1602 | },
1603 | {
1604 | "output_type": "stream",
1605 | "text": [
1606 | "Log_Loss: 0.39003183998007446\n"
1607 | ],
1608 | "name": "stdout"
1609 | },
1610 | {
1611 | "output_type": "stream",
1612 | "text": [
1613 | "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']\n",
1614 | "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
1615 | "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
1616 | "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']\n",
1617 | "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
1618 | ],
1619 | "name": "stderr"
1620 | },
1621 | {
1622 | "output_type": "stream",
1623 | "text": [
1624 | "Log_Loss: 0.35637871529928816\n",
1625 | "CPU times: user 11min 2s, sys: 4min 21s, total: 15min 23s\n",
1626 | "Wall time: 16min 7s\n"
1627 | ],
1628 | "name": "stdout"
1629 | }
1630 | ]
1631 | },
1632 | {
1633 | "cell_type": "code",
1634 | "metadata": {
1635 | "id": "uF8CEmS7CSto",
1636 | "colab_type": "code",
1637 | "colab": {
1638 | "base_uri": "https://localhost:8080/",
1639 | "height": 34
1640 | },
1641 | "outputId": "9ad353de-03e3-4e67-ea84-b6da506d6b28"
1642 | },
1643 | "source": [
1644 | "print(\"Mean LogLoss: \",np.mean(err))"
1645 | ],
1646 | "execution_count": null,
1647 | "outputs": [
1648 | {
1649 | "output_type": "stream",
1650 | "text": [
1651 | "Mean LogLoss: 0.34930175561484067\n"
1652 | ],
1653 | "name": "stdout"
1654 | }
1655 | ]
1656 | },
1657 | {
1658 | "cell_type": "code",
1659 | "metadata": {
1660 | "id": "1wXzsh0YQFX3",
1661 | "colab_type": "code",
1662 | "colab": {
1663 | "base_uri": "https://localhost:8080/",
1664 | "height": 158
1665 | },
1666 | "outputId": "81f90e11-01ca-4ef3-ae59-918a9ecd4cc2"
1667 | },
1668 | "source": [
1669 | "raw_outputs_vals"
1670 | ],
1671 | "execution_count": null,
1672 | "outputs": [
1673 | {
1674 | "output_type": "execute_result",
1675 | "data": {
1676 | "text/plain": [
1677 | "array([[9.9822301e-01, 3.4856689e-04, 3.8243082e-04, 1.0458552e-03],\n",
1678 | " [9.9695909e-01, 1.1522240e-03, 5.9563853e-04, 1.2927916e-03],\n",
1679 | " [9.9910539e-01, 2.3084633e-04, 2.5905663e-04, 4.0465154e-04],\n",
1680 | " ...,\n",
1681 | " [3.6545596e-04, 2.8826005e-04, 4.3145564e-04, 9.9891484e-01],\n",
1682 | " [4.0789684e-03, 9.9224585e-01, 1.2752400e-03, 2.3997365e-03],\n",
1683 | " [3.7382307e-04, 3.4797701e-04, 3.6257200e-04, 9.9891579e-01]],\n",
1684 | " dtype=float32)"
1685 | ]
1686 | },
1687 | "metadata": {
1688 | "tags": []
1689 | },
1690 | "execution_count": 11
1691 | }
1692 | ]
1693 | },
1694 | {
1695 | "cell_type": "code",
1696 | "metadata": {
1697 | "id": "OMZsTFFKTW6y",
1698 | "colab_type": "code",
1699 | "colab": {}
1700 | },
1701 | "source": [
1702 | "for i in range(len(raw_outputs_vals)):\n",
1703 | " for j in range(4):\n",
1704 | " if(max(raw_outputs_vals[i])==raw_outputs_vals[i][j]):\n",
1705 | " raw_outputs_vals[i][j]=1\n",
1706 | " else :\n",
1707 | " raw_outputs_vals[i][j]=0"
1708 | ],
1709 | "execution_count": null,
1710 | "outputs": []
1711 | },
1712 | {
1713 | "cell_type": "code",
1714 | "metadata": {
1715 | "id": "9QIdfEoVTwIo",
1716 | "colab_type": "code",
1717 | "colab": {
1718 | "base_uri": "https://localhost:8080/",
1719 | "height": 141
1720 | },
1721 | "outputId": "b3111163-b557-4deb-b349-b6ccc21de6e6"
1722 | },
1723 | "source": [
1724 | "raw_outputs_vals"
1725 | ],
1726 | "execution_count": null,
1727 | "outputs": [
1728 | {
1729 | "output_type": "execute_result",
1730 | "data": {
1731 | "text/plain": [
1732 | "array([[1., 0., 0., 0.],\n",
1733 | " [1., 0., 0., 0.],\n",
1734 | " [1., 0., 0., 0.],\n",
1735 | " ...,\n",
1736 | " [0., 0., 0., 1.],\n",
1737 | " [0., 1., 0., 0.],\n",
1738 | " [0., 0., 0., 1.]], dtype=float32)"
1739 | ]
1740 | },
1741 | "metadata": {
1742 | "tags": []
1743 | },
1744 | "execution_count": 13
1745 | }
1746 | ]
1747 | },
1748 | {
1749 | "cell_type": "markdown",
1750 | "metadata": {
1751 | "id": "tN0ms42rxnsQ",
1752 | "colab_type": "text"
1753 | },
1754 | "source": [
1755 | "Test Our model\n"
1756 | ]
1757 | },
1758 | {
1759 | "cell_type": "code",
1760 | "metadata": {
1761 | "id": "IfKXi8W3Ty1y",
1762 | "colab_type": "code",
1763 | "colab": {}
1764 | },
1765 | "source": [
1766 | "\n",
1767 | "pred = model.predict(['i want to travel to thailand'])[1]\n",
1768 | "preds = softmax(pred,axis=1)"
1769 | ],
1770 | "execution_count": null,
1771 | "outputs": []
1772 | },
1773 | {
1774 | "cell_type": "code",
1775 | "metadata": {
1776 | "id": "xDRatzb6UUf0",
1777 | "colab_type": "code",
1778 | "colab": {
1779 | "base_uri": "https://localhost:8080/",
1780 | "height": 52
1781 | },
1782 | "outputId": "ca910344-ee1b-4d12-efa5-b8071e412b35"
1783 | },
1784 | "source": [
1785 | "preds"
1786 | ],
1787 | "execution_count": null,
1788 | "outputs": [
1789 | {
1790 | "output_type": "execute_result",
1791 | "data": {
1792 | "text/plain": [
1793 | "array([[6.0461409e-04, 3.6119239e-04, 3.3729596e-04, 9.9869716e-01]],\n",
1794 | " dtype=float32)"
1795 | ]
1796 | },
1797 | "metadata": {
1798 | "tags": []
1799 | },
1800 | "execution_count": 16
1801 | }
1802 | ]
1803 | },
1804 | {
1805 | "cell_type": "code",
1806 | "metadata": {
1807 | "id": "om5hQMC_yOZM",
1808 | "colab_type": "code",
1809 | "colab": {}
1810 | },
1811 | "source": [
1812 | "#we create a function which calculate the maximum probability and detect the topic\n",
1813 | "#for example if we have 0.6 politics 0.1 art 0.15 health 0.15 tourism >>>> topic = politics "
1814 | ],
1815 | "execution_count": null,
1816 | "outputs": []
1817 | },
1818 | {
1819 | "cell_type": "code",
1820 | "metadata": {
1821 | "id": "wTxx4VYSVKpb",
1822 | "colab_type": "code",
1823 | "colab": {}
1824 | },
1825 | "source": [
1826 | "def estm(raw_outputs_vals):\n",
1827 | " for i in range(len(raw_outputs_vals)):\n",
1828 | " for j in range(4):\n",
1829 | " if(max(raw_outputs_vals[i])==raw_outputs_vals[i][j]):\n",
1830 | " raw_outputs_vals[i][j]=1\n",
1831 | " else :\n",
1832 | " raw_outputs_vals[i][j]=0\n",
1833 | " return(raw_outputs_vals) "
1834 | ],
1835 | "execution_count": null,
1836 | "outputs": []
1837 | },
1838 | {
1839 | "cell_type": "code",
1840 | "metadata": {
1841 | "id": "vqcYwFcaVuRG",
1842 | "colab_type": "code",
1843 | "colab": {
1844 | "base_uri": "https://localhost:8080/",
1845 | "height": 34
1846 | },
1847 | "outputId": "a753c9ee-1d15-458d-9b4b-134b7cfe59ab"
1848 | },
1849 | "source": [
1850 | "estm(preds)"
1851 | ],
1852 | "execution_count": null,
1853 | "outputs": [
1854 | {
1855 | "output_type": "execute_result",
1856 | "data": {
1857 | "text/plain": [
1858 | "array([[0., 0., 0., 1.]], dtype=float32)"
1859 | ]
1860 | },
1861 | "metadata": {
1862 | "tags": []
1863 | },
1864 | "execution_count": 18
1865 | }
1866 | ]
1867 | },
1868 | {
1869 | "cell_type": "code",
1870 | "metadata": {
1871 | "id": "0l6xaVmrxuQP",
1872 | "colab_type": "code",
1873 | "colab": {}
1874 | },
1875 | "source": [
1876 | "#our labels are :['art', 'politics', 'health', 'tourism']\n",
1877 | "#so that's correct ;)"
1878 | ],
1879 | "execution_count": null,
1880 | "outputs": []
1881 | },
1882 | {
1883 | "cell_type": "code",
1884 | "metadata": {
1885 | "id": "R806zK7Ep0Yx",
1886 | "colab_type": "code",
1887 | "colab": {}
1888 | },
1889 | "source": [
1890 | ""
1891 | ],
1892 | "execution_count": null,
1893 | "outputs": []
1894 | }
1895 | ]
1896 | }
--------------------------------------------------------------------------------