├── README.md ├── biobert_ner ├── README.md ├── XMLtoTSV.py ├── analysis-ai │ ├── .gitignore │ ├── README.md │ ├── package.json │ ├── public │ │ ├── favicon.ico │ │ ├── index.html │ │ └── manifest.json │ ├── src │ │ ├── actions │ │ │ ├── bcdr.js │ │ │ ├── bioNlp.js │ │ │ └── request-type.js │ │ ├── components │ │ │ ├── button │ │ │ │ └── index.js │ │ │ ├── fork │ │ │ │ └── index.js │ │ │ ├── header │ │ │ │ └── index.js │ │ │ ├── highlighter │ │ │ │ └── index.js │ │ │ ├── input-with-examples │ │ │ │ ├── example-select.js │ │ │ │ ├── example-text.js │ │ │ │ ├── index.js │ │ │ │ ├── submit.js │ │ │ │ └── tooltip-composed-menu.js │ │ │ ├── request-type-radio │ │ │ │ └── index.js │ │ │ └── response-text-area │ │ │ │ ├── container.js │ │ │ │ ├── index.js │ │ │ │ └── styled-text.js │ │ ├── containers │ │ │ ├── input-with-examples │ │ │ │ ├── bc5dr-select.js │ │ │ │ ├── bc5dr-submit.js │ │ │ │ ├── bc5dr-text.js │ │ │ │ ├── example-select.js │ │ │ │ ├── example-text.js │ │ │ │ ├── index.js │ │ │ │ └── submit.js │ │ │ ├── request-type-radio.js │ │ │ └── response-text-area │ │ │ │ ├── bc5dr-text-area.js │ │ │ │ ├── bioNlp-text-area.js │ │ │ │ └── container.js │ │ ├── enums │ │ │ └── request-types.js │ │ ├── index.css │ │ ├── index.js │ │ ├── logo.svg │ │ ├── pages │ │ │ └── home │ │ │ │ ├── index.css │ │ │ │ ├── index.js │ │ │ │ └── index.test.js │ │ ├── reducers │ │ │ ├── bcdr.js │ │ │ ├── bioNlp.js │ │ │ ├── index.js │ │ │ └── request-type.js │ │ ├── redux-constants │ │ │ └── fetch.js │ │ ├── serviceWorker.js │ │ └── utils │ │ │ ├── mapCodeToColors.js │ │ │ └── params.js │ └── yarn.lock ├── api.py ├── convert_to_pytorch_wt.ipynb ├── data_load.py ├── extras │ └── ezgif.com-video-to-gif.gif ├── new_model.py ├── new_train.py ├── parameters.py └── requirements.txt └── fill_the_blanks ├── README.md ├── fill_blanks.py └── fillblank.gif /README.md: -------------------------------------------------------------------------------- 1 | This repo contains my experiments with Bert pretrained weights. 2 | -------------------------------------------------------------------------------- /biobert_ner/README.md: -------------------------------------------------------------------------------- 1 | # Solving BioNLP problems using Bert(BioBert Pytorch) 2 | 3 | Working Demo For NER can be found [here](http://13.72.66.146:5000/) 4 | 5 | This repository contains fine-tuning of Biobert[https://arxiv.org/abs/1901.08746]. 6 | 7 | ## Preparation :- 8 | To use biobert, download [weights](https://github.com/naver/biobert-pretrained/releases), and make it compatible with pytorch using script [convert_to_pytorch_wt.ipynb](https://github.com/MeRajat/SolvingAlmostAnythingWithBert/blob/ner_medical/convert_to_pytorch_wt.ipynb). 9 | 10 | Place converted weights into ```weights/``` folder. 11 | 12 | ## NER :- 13 | 14 | NER Data can be downloaded using https://github.com/cambridgeltl/MTL-Bioinformatics-2016. 15 | 16 | Select NER data you want to train on and move it to data folder. 17 | 18 | ### Datasets 19 | 20 | We have used [BC5CDR](https://biocreative.bioinformatics.udel.edu/tasks/biocreative-v/track-3-cdr/) and [BioNLP13CG](http://2013.bionlp-st.org/). 21 | 22 | BC5CDR tags :- 23 | ``` 24 | 'B-Chemical', 25 | 'O', 26 | 'B-Disease', 27 | 'I-Disease', 28 | 'I-Chemical' 29 | ``` 30 | 31 | BioNLP13CG tags :- 32 | ``` 'B-Amino_acid', 33 | 'B-Anatomical_system', 34 | 'B-Cancer', 35 | 'B-Cell', 36 | 'B-Cellular_component', 37 | 'B-Developing_anatomical_structure', 38 | 'B-Gene_or_gene_product', 39 | 'B-Immaterial_anatomical_entity', 40 | 'B-Multi-tissue_structure', 41 | 'B-Organ', 42 | 'B-Organism', 43 | 'B-Organism_subdivision', 44 | 'B-Organism_substance', 45 | 'B-Pathological_formation', 46 | 'B-Simple_chemical', 47 | 'B-Tissue', 48 | 'I-Amino_acid', 49 | 'I-Anatomical_system', 50 | 'I-Cancer', 51 | 'I-Cell', 52 | 'I-Cellular_component', 53 | 'I-Developing_anatomical_structure', 54 | 'I-Gene_or_gene_product', 55 | 'I-Immaterial_anatomical_entity', 56 | 'I-Multi-tissue_structure', 57 | 'I-Organ', 58 | 'I-Organism', 59 | 'I-Organism_subdivision', 60 | 'I-Organism_substance', 61 | 'I-Pathological_formation', 62 | 'I-Simple_chemical', 63 | 'I-Tissue', 64 | 'O' 65 | ``` 66 | 67 | ## Result 68 | 69 | After fine-tuning it with biobert weights result were pretty good, F1-score for BC5CDR was 95 and for BioNLP13CG was 92. 70 | 71 | Examples 72 | 73 | BC5CDR :- 74 | 75 | ``` 76 | Sentence = The authors describe the case of a 56 - year - old woman with chronic , severe heart failure secondary to dilated cardiomyopathy and absence of significant ventricular arrhythmias who developed QT prolongation and torsade de pointes ventricular tachycardia during one cycle of intermittent low dose ( 2 . 5 mcg / kg per min ) dobutamine . 77 | 78 | Result = 79 | {"tagging":[["The","O"],["authors","O"],["describe","O"], 80 | ["the","O"],["case","O"],["of","O"],["a","O"],["56","O"],["-", 81 | "O"],["year","O"],["-","O"],["old","O"],["woman","O"],["with", 82 | "O"],["chronic","O"],[",","O"],["severe","O"],["heart", 83 | "I-Disease"],["failure","I-Disease"],["secondary","O"],["to", 84 | "O"],["dilated","B-Disease"],["cardiomyopathy","I-Disease"], 85 | ["and","O"],["absence","O"],["of","O"],["significant","O"], 86 | ["ventricular","B-Disease"],["arrhythmias","I-Disease"], 87 | ["who","O"],["developed","O"],["QT","B-Disease"], 88 | ["prolongation","I-Disease"],["and","O"],["torsade", 89 | "B-Disease"],["de","I-Disease"],["pointes","I-Disease"], 90 | ["ventricular","I-Disease"],["tachycardia","I-Disease"], 91 | ["during","O"],["one","O"],["cycle","O"],["of","O"], 92 | ["intermittent","O"],["low","O"],["dose","O"],["(","O"],["2", 93 | "O"],[".","O"],["5","O"],["mcg","O"],["/","O"],["kg","O"], 94 | ["per","O"],["min","O"],[")","O"],["dobutamine","B-Chemical"], 95 | [".","O"]]} 96 | 97 | ``` 98 | 99 | BioNLP13CG :- 100 | 101 | ``` 102 | Sentence = Cooccurrence of reduced expression of alpha - catenin and overexpression of p53 is a predictor of lymph node metastasis in early gastric cancer . 103 | 104 | 105 | Result = 106 | {"tags":[["Cooccurrence","O"],["of","O"],["reduced","O"], 107 | ["expression","O"],["of","O"],["alpha", 108 | "B-Gene_or_gene_product"],["-","I-Gene_or_gene_product"], 109 | ["catenin","I-Gene_or_gene_product"],["and","O"], 110 | ["overexpression","O"],["of","O"],["p53", 111 | "B-Gene_or_gene_product"],["is","O"],["a","O"],["predictor", 112 | "O"],["of","O"],["lymph","B-Multi-tissue_structure"],["node", 113 | "I-Multi-tissue_structure"],["metastasis","O"],["in","O"], 114 | ["early","O"],["gastric","B-Cancer"],["cancer","I-Cancer"], 115 | [".","O"]]} 116 | 117 | ``` 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /biobert_ner/XMLtoTSV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MeRajat/SolvingAlmostAnythingWithBert/1bfb6d679a668179bbb783d1c0eb9f338cd0f1c5/biobert_ner/XMLtoTSV.py -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/README.md: -------------------------------------------------------------------------------- 1 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). 2 | 3 | ## Available Scripts 4 | 5 | In the project directory, you can run: 6 | 7 | ### `npm start` 8 | 9 | Runs the app in the development mode.
10 | Open [http://localhost:3000](http://localhost:3000) to view it in the browser. 11 | 12 | The page will reload if you make edits.
13 | You will also see any lint errors in the console. 14 | 15 | ### `npm test` 16 | 17 | Launches the test runner in the interactive watch mode.
18 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. 19 | 20 | ### `npm run build` 21 | 22 | Builds the app for production to the `build` folder.
23 | It correctly bundles React in production mode and optimizes the build for the best performance. 24 | 25 | The build is minified and the filenames include the hashes.
26 | Your app is ready to be deployed! 27 | 28 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. 29 | 30 | ### `npm run eject` 31 | 32 | **Note: this is a one-way operation. Once you `eject`, you can’t go back!** 33 | 34 | If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. 35 | 36 | Instead, it will copy all the configuration files and the transitive dependencies (Webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own. 37 | 38 | You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it. 39 | 40 | ## Learn More 41 | 42 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). 43 | 44 | To learn React, check out the [React documentation](https://reactjs.org/). 45 | 46 | ### Code Splitting 47 | 48 | This section has moved here: https://facebook.github.io/create-react-app/docs/code-splitting 49 | 50 | ### Analyzing the Bundle Size 51 | 52 | This section has moved here: https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size 53 | 54 | ### Making a Progressive Web App 55 | 56 | This section has moved here: https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app 57 | 58 | ### Advanced Configuration 59 | 60 | This section has moved here: https://facebook.github.io/create-react-app/docs/advanced-configuration 61 | 62 | ### Deployment 63 | 64 | This section has moved here: https://facebook.github.io/create-react-app/docs/deployment 65 | 66 | ### `npm run build` fails to minify 67 | 68 | This section has moved here: https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify 69 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "analysis-ai", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@material-ui/core": "^3.9.2", 7 | "dotenv": "^6.2.0", 8 | "gh-pages": "^2.0.1", 9 | "prop-types": "^15.7.2", 10 | "react": "^16.8.3", 11 | "react-dom": "^16.8.3", 12 | "react-redux": "^6.0.1", 13 | "react-scripts": "2.1.5", 14 | "redux": "^4.0.1", 15 | "redux-thunk": "^2.3.0", 16 | "styled-components": "^4.1.3" 17 | }, 18 | "scripts": { 19 | "start": "react-scripts start", 20 | "build": "react-scripts build", 21 | "test": "react-scripts test", 22 | "eject": "react-scripts eject" 23 | }, 24 | "eslintConfig": { 25 | "extends": "react-app" 26 | }, 27 | "browserslist": [ 28 | ">0.2%", 29 | "not dead", 30 | "not ie <= 11", 31 | "not op_mini all" 32 | ], 33 | "devDependencies": { 34 | "serve": "^10.1.2" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MeRajat/SolvingAlmostAnythingWithBert/1bfb6d679a668179bbb783d1c0eb9f338cd0f1c5/biobert_ner/analysis-ai/public/favicon.ico -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 10 | 11 | 15 | 16 | 17 | 18 | 27 | BioNLP 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "BioNLP App", 3 | "name": "Solving BioNLP Problems", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "32x32 16x16", 8 | "type": "image/x-icon" 9 | } 10 | ], 11 | "start_url": ".", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/actions/bcdr.js: -------------------------------------------------------------------------------- 1 | import constants from '../redux-constants/fetch'; 2 | import params from '../utils/params'; 3 | 4 | export const fetchBc5cdr = () => async (dispatch, getState) => { 5 | dispatch({ 6 | type: constants.FETCH_BC5CDR_REQUEST 7 | }) 8 | 9 | const payload = params(getState().bc5cdr.request); 10 | 11 | try { 12 | const response = await fetch(`http://13.72.66.146:9000/extract-ner?${payload}`) 13 | .then(response => response.json()); 14 | dispatch({ 15 | type: constants.FETCH_BC5CDR_SUCCESS, 16 | response 17 | }) 18 | } catch (error) { 19 | dispatch({ 20 | type: constants.FETCH_BC5CDR_FAILURE, 21 | errorMessage: 'Request Failed' 22 | }) 23 | } 24 | } 25 | 26 | export const updateBc5cdr = text => { 27 | return { 28 | type: constants.UPDATE_BC5CDR, 29 | content: text 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/actions/bioNlp.js: -------------------------------------------------------------------------------- 1 | import constants from '../redux-constants/fetch'; 2 | import params from '../utils/params'; 3 | 4 | export const fetchBioNlp = () => async (dispatch, getState) => { 5 | dispatch({ 6 | type: constants.FETCH_BIO_NLP_REQUEST 7 | }) 8 | 9 | const payload = params(getState().bioNlp.request); 10 | 11 | try { 12 | const response = await fetch(`http://13.72.66.146:9000/extract-ner?${payload}`) 13 | .then(response => response.json()); 14 | dispatch({ 15 | type: constants.FETCH_BIO_NLP_SUCCESS, 16 | response 17 | }) 18 | } catch (error) { 19 | dispatch({ 20 | type: constants.FETCH_BIO_NLP_FAILURE, 21 | errorMessage: 'Request Failed' 22 | }) 23 | } 24 | } 25 | 26 | export const updateBioNlp = text => { 27 | return { 28 | type: constants.UPDATE_BIO_NLP, 29 | content: text 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/actions/request-type.js: -------------------------------------------------------------------------------- 1 | import constants from '../redux-constants/fetch'; 2 | 3 | export const updateRequestType = (type) => { 4 | return { 5 | type: constants.REQUEST_TYPE_CHANGE, 6 | requestType: type 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/button/index.js: -------------------------------------------------------------------------------- 1 | import styled from 'styled-components'; 2 | import Button from '@material-ui/core/Button'; 3 | 4 | export default styled(Button)` 5 | color: #f2f2f2 !important; 6 | `; 7 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/fork/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import styled from 'styled-components'; 4 | 5 | const GithubForkWrapper = styled.a` 6 | position: fixed; 7 | top: 0; 8 | right: 0; 9 | z-index: 1000; 10 | margin: 10px; 11 | text-decoration: none; 12 | font-size: 30px; 13 | color: rgba(0, 0, 0, 0.5); 14 | cursor: pointer; 15 | 16 | &:focus { 17 | outline: none; 18 | } 19 | 20 | &:hover, &:focus { 21 | color: rgba(0, 0, 0, 0.8); 22 | } 23 | `; 24 | 25 | export default class GithubFork extends React.PureComponent { 26 | static propTypes = { 27 | href: PropTypes.string.isRequired 28 | }; 29 | 30 | render () { 31 | return 32 | 33 | 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/header/index.js: -------------------------------------------------------------------------------- 1 | import styled from 'styled-components'; 2 | import Typography from '@material-ui/core/Typography'; 3 | 4 | export const Header = styled(Typography)` 5 | color: ${props => props.color ? props.color: '#4f4f4f'} !important; 6 | `; 7 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/highlighter/index.js: -------------------------------------------------------------------------------- 1 | import styled from 'styled-components'; 2 | 3 | export const Highlighter = styled.span` 4 | color: ${props => props.color}; 5 | border-radius: 5px; 6 | background-color: ${props => props.bgColor ? props.bgColor : 'transparent'} 7 | `; 8 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/input-with-examples/example-select.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import FormControl from '@material-ui/core/FormControl'; 4 | import MenuItem from '@material-ui/core/MenuItem'; 5 | import InputLabel from '@material-ui/core/InputLabel'; 6 | import Select from '@material-ui/core/Select'; 7 | import OutlinedInput from '@material-ui/core/OutlinedInput'; 8 | import styled from 'styled-components'; 9 | import RootRef from '@material-ui/core/RootRef'; 10 | 11 | import TooltipMenu from './tooltip-composed-menu'; 12 | 13 | const examples = [ 14 | { 15 | key: 'bnlp#1', 16 | type: 'bioNlp', 17 | text: 'Cooccurrence of reduced expression of alpha - catenin and overexpression of p53 is a predictor of lymph node metastasis in early gastric cancer.', 18 | }, 19 | { 20 | key: 'bnlp#2', 21 | type: 'bioNlp', 22 | text: 'In this review , the role of TSH - R gene alterations in benign and malignant thyroid neoplasia is examined.', 23 | }, 24 | { 25 | key: 'bc5cdr#1', 26 | type: 'bc5cdr', 27 | text: "The authors describe the case of a 56 - year - old woman with chronic , severe heart failure secondary to dilated cardiomyopathy and absence of significant ventricular arrhythmias who developed QT prolongation and torsade de pointes ventricular tachycardia during one cycle of intermittent low dose ( 2.5 mcg/kg per min ) dobutamine." 28 | } 29 | ]; 30 | 31 | const Subheader = styled.li` 32 | font-family: "Roboto", "Helvetica", "Arial", sans-serif; 33 | line-height: 1.5em; 34 | padding: 11px 16px; 35 | color: #827717; 36 | font-weight: 700; 37 | border-bottom: 1px solid #e2e2e2; 38 | pointer-events: none; 39 | `; 40 | 41 | 42 | 43 | export default class ExampleSelect extends React.Component { 44 | static propTypes = { 45 | update: PropTypes.func.isRequired 46 | }; 47 | 48 | state = { 49 | content: '', 50 | selectedExampleKey: '' 51 | } 52 | 53 | constructor(props) { 54 | super(props); 55 | this.labelRef = React.createRef(); 56 | } 57 | 58 | 59 | handleChange = event => { 60 | const selectedExample = typeof event.target.value === 'string' ? {} : event.target.value; 61 | this.setState({ 62 | content: selectedExample.text || '', 63 | selectedExampleKey: selectedExample.key || '' 64 | }) 65 | this.props.update(selectedExample.text || ''); 66 | }; 67 | 68 | render () { 69 | return ( 70 | 71 | 72 | 76 | Example Texts 77 | 78 | 79 | 115 | 116 | ); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/input-with-examples/example-text.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import TextField from '@material-ui/core/TextField'; 4 | 5 | export default class ExampleText extends React.Component { 6 | static propTypes = { 7 | content: PropTypes.string, 8 | update: PropTypes.func.isRequired 9 | } 10 | 11 | handleChange = event => { 12 | this.props.update(event.target.value); 13 | } 14 | 15 | render () { 16 | return ( 17 | 27 | ) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/input-with-examples/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import ExampleSelect from '../../containers/input-with-examples/example-select'; 4 | import ExampleText from '../../containers/input-with-examples/example-text'; 5 | import Submit from '../../containers/input-with-examples/submit'; 6 | import Bc5drSelect from '../../containers/input-with-examples/bc5dr-select'; 7 | import Bc5drText from '../../containers/input-with-examples/bc5dr-text'; 8 | import Bc5drSubmit from '../../containers/input-with-examples/bc5dr-submit'; 9 | import Grid from '@material-ui/core/Grid'; 10 | 11 | import types from '../../enums/request-types'; 12 | 13 | export default class InputWithExamples extends React.PureComponent { 14 | static propTypes = { 15 | type: PropTypes.string.isRequired 16 | } 17 | 18 | render () { 19 | return ( 20 | 21 | 22 | { this.props.type === types.BIO_NLP 23 | ? 24 | : 25 | } 26 | 27 | 28 | { this.props.type === types.BIO_NLP 29 | ? 30 | : 31 | } 32 | 33 | 34 | { this.props.type === types.BIO_NLP 35 | ? 36 | : 37 | } 38 | 39 | 40 | ) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/input-with-examples/submit.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import Button from '../button'; 4 | 5 | const Submit = (props) => { 6 | return ( 7 | 10 | ) 11 | } 12 | 13 | Submit.propTypes = { 14 | fetchData: PropTypes.func.isRequired 15 | } 16 | 17 | export default Submit; 18 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/input-with-examples/tooltip-composed-menu.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import Tooltip from '@material-ui/core/Tooltip'; 3 | import MenuItem from '@material-ui/core/MenuItem'; 4 | 5 | import styled from 'styled-components'; 6 | 7 | const EllipsisText = styled.span` 8 | width: 100%; 9 | overflow: hidden; 10 | white-space: nowrap; 11 | text-overflow: ellipsis; 12 | `; 13 | 14 | export default props => { 15 | const {key, selectedKey, ...rest} = props; 16 | const value = rest['data-value']; 17 | return 18 | 19 | {value.text} 20 | 21 | 22 | // return 'Hello World' 23 | } 24 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/request-type-radio/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import FormControlLabel from '@material-ui/core/FormControlLabel'; 4 | import Radio from '@material-ui/core/Radio'; 5 | import RadioGroup from '@material-ui/core/RadioGroup'; 6 | 7 | import types from '../../enums/request-types'; 8 | 9 | export default class RequestRadio extends React.PureComponent { 10 | static propTypes = { 11 | type: PropTypes.string.isRequired, 12 | updateRequestType: PropTypes.func.isRequired 13 | } 14 | handleChange = event => { 15 | this.props.updateRequestType(event.target.value) 16 | } 17 | render () { 18 | return ( 19 | 26 | } 29 | label="BIO NLP 13CG" 30 | labelPlacement="end" 31 | /> 32 | } 35 | label="BC5CDR" 36 | labelPlacement="end" 37 | /> 38 | 39 | ) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/response-text-area/container.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import BioNlpTextArea from '../../containers/response-text-area/bioNlp-text-area' 3 | import Bc5drTextArea from '../../containers/response-text-area/bc5dr-text-area' 4 | 5 | import types from '../../enums/request-types'; 6 | 7 | export default props => { 8 | if (props.type === types.BIO_NLP) { 9 | return 10 | } 11 | return 12 | } 13 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/response-text-area/index.js: -------------------------------------------------------------------------------- 1 | import React, {Fragment} from 'react'; 2 | import styled from 'styled-components'; 3 | import Tooltip from '@material-ui/core/Tooltip'; 4 | import CircularProgress from '@material-ui/core/CircularProgress'; 5 | import StyledText from './styled-text'; 6 | 7 | import mapCodeToColors from '../../utils/mapCodeToColors'; 8 | 9 | const Wrapper = styled.div` 10 | background: #fafafa; 11 | padding: 24px; 12 | border-radius: 5px; 13 | border: 1px solid #efefef; 14 | line-height: 1.4; 15 | `; 16 | 17 | const renderWrapper = (tags) => { 18 | return 19 | { 20 | tags.map((tag, i) => { 21 | let space = ' '; 22 | if (i === 0) { 23 | space = ''; 24 | } 25 | if (tag[1] === 'O') { 26 | return {space + tag[0]} 27 | } 28 | return 29 | {space} 30 | 31 | 35 | 36 | {tag[0]} 37 | 38 | 39 | {tag[1]} 40 | 41 | 42 | 43 | 44 | }) 45 | } 46 | 47 | } 48 | 49 | export default props => { 50 | if (props.tags && !props.loading) { 51 | return renderWrapper(props.tags); 52 | } 53 | if (props.loading) { 54 | return 55 | 56 | 57 | } 58 | return null; 59 | } 60 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/components/response-text-area/styled-text.js: -------------------------------------------------------------------------------- 1 | import styled from 'styled-components'; 2 | import { withTheme } from '@material-ui/core/styles'; 3 | 4 | const StyledText = styled.span` 5 | display: inline-flex; 6 | flex-direction: column; 7 | position: relative; 8 | padding: 5px; 9 | margin-bottom: 5px; 10 | border-radius: 5px; 11 | box-sizing: border-box; 12 | background: ${props => props.bgColor ? props.bgColor : props.theme.palette.primary.main}; 13 | 14 | > span.text { 15 | padding: 4px 10px; 16 | background: #fff; 17 | color: #424242; 18 | box-sizing: border-box; 19 | border-radius: 5px; 20 | line-height: 1.4; 21 | text-align: center; 22 | } 23 | 24 | > span.type { 25 | font-size: 12px; 26 | background: ${props => props.bgColor ? props.bgColor : props.theme.palette.primary.main}; 27 | color: ${props => props.color ? props.color : props.theme.palette.primary.main}; 28 | line-height: 1; 29 | padding-top: 5px; 30 | border-radius: 0 0 5px 5px; 31 | text-align: center; 32 | } 33 | `; 34 | 35 | export default withTheme()(StyledText) 36 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/input-with-examples/bc5dr-select.js: -------------------------------------------------------------------------------- 1 | import { bindActionCreators } from 'redux'; 2 | import { connect } from 'react-redux'; 3 | import { updateBc5cdr } from '../../actions/bcdr'; 4 | import ExampleSelect from '../../components/input-with-examples/example-select'; 5 | 6 | const mapStateToProps = state => { 7 | return { 8 | content: state.bc5cdr.request.text 9 | } 10 | } 11 | 12 | const mapDispatchToProps = dispatch => { 13 | return bindActionCreators({ 14 | update: updateBc5cdr 15 | }, dispatch); 16 | } 17 | 18 | export default connect(mapStateToProps, mapDispatchToProps)(ExampleSelect); 19 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/input-with-examples/bc5dr-submit.js: -------------------------------------------------------------------------------- 1 | import { bindActionCreators } from 'redux'; 2 | import { connect } from 'react-redux'; 3 | import { fetchBc5cdr } from '../../actions/bcdr'; 4 | import Submit from '../../components/input-with-examples/submit'; 5 | 6 | const mapDispatchToProps = dispatch => { 7 | return bindActionCreators({ 8 | fetchData: fetchBc5cdr 9 | }, dispatch); 10 | } 11 | 12 | export default connect(null, mapDispatchToProps)(Submit); 13 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/input-with-examples/bc5dr-text.js: -------------------------------------------------------------------------------- 1 | import { bindActionCreators } from 'redux'; 2 | import { connect } from 'react-redux'; 3 | import { updateBc5cdr } from '../../actions/bcdr'; 4 | import ExampleText from '../../components/input-with-examples/example-text'; 5 | 6 | const mapStateToProps = state => { 7 | return { 8 | content: state.bc5cdr.request.text 9 | } 10 | } 11 | 12 | const mapDispatchToProps = dispatch => { 13 | return bindActionCreators({ 14 | update: updateBc5cdr 15 | }, dispatch); 16 | } 17 | 18 | export default connect(mapStateToProps, mapDispatchToProps)(ExampleText); 19 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/input-with-examples/example-select.js: -------------------------------------------------------------------------------- 1 | import { bindActionCreators } from 'redux'; 2 | import { connect } from 'react-redux'; 3 | import { updateBioNlp } from '../../actions/bioNlp'; 4 | import ExampleSelect from '../../components/input-with-examples/example-select'; 5 | 6 | const mapStateToProps = state => { 7 | return { 8 | content: state.bioNlp.request.text 9 | } 10 | } 11 | 12 | const mapDispatchToProps = dispatch => { 13 | return bindActionCreators({ 14 | update: updateBioNlp 15 | }, dispatch); 16 | } 17 | 18 | export default connect(mapStateToProps, mapDispatchToProps)(ExampleSelect); 19 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/input-with-examples/example-text.js: -------------------------------------------------------------------------------- 1 | import { bindActionCreators } from 'redux'; 2 | import { connect } from 'react-redux'; 3 | import { updateBioNlp } from '../../actions/bioNlp'; 4 | import ExampleText from '../../components/input-with-examples/example-text'; 5 | 6 | const mapStateToProps = state => { 7 | return { 8 | content: state.bioNlp.request.text 9 | } 10 | } 11 | 12 | const mapDispatchToProps = dispatch => { 13 | return bindActionCreators({ 14 | update: updateBioNlp 15 | }, dispatch); 16 | } 17 | 18 | export default connect(mapStateToProps, mapDispatchToProps)(ExampleText); 19 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/input-with-examples/index.js: -------------------------------------------------------------------------------- 1 | import { connect } from 'react-redux'; 2 | import ExampleSelect from '../../components/input-with-examples'; 3 | 4 | const mapStateToProps = state => { 5 | return { 6 | type: state.requestType.type 7 | } 8 | } 9 | 10 | export default connect(mapStateToProps, null)(ExampleSelect); 11 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/input-with-examples/submit.js: -------------------------------------------------------------------------------- 1 | import { bindActionCreators } from 'redux'; 2 | import { connect } from 'react-redux'; 3 | import { fetchBioNlp } from '../../actions/bioNlp'; 4 | import Submit from '../../components/input-with-examples/submit'; 5 | 6 | const mapDispatchToProps = dispatch => { 7 | return bindActionCreators({ 8 | fetchData: fetchBioNlp 9 | }, dispatch); 10 | } 11 | 12 | export default connect(null, mapDispatchToProps)(Submit); 13 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/request-type-radio.js: -------------------------------------------------------------------------------- 1 | import { bindActionCreators } from 'redux'; 2 | import { connect } from 'react-redux'; 3 | import { updateRequestType } from '../actions/request-type'; 4 | import RequestRadio from '../components/request-type-radio'; 5 | 6 | const mapStateToProps = state => { 7 | return { 8 | type: state.requestType.type 9 | } 10 | } 11 | 12 | const mapDispatchToProps = dispatch => { 13 | return bindActionCreators({ 14 | updateRequestType 15 | }, dispatch); 16 | } 17 | 18 | export default connect(mapStateToProps, mapDispatchToProps)(RequestRadio); 19 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/response-text-area/bc5dr-text-area.js: -------------------------------------------------------------------------------- 1 | import { connect } from 'react-redux'; 2 | import ResponseTextArea from '../../components/response-text-area'; 3 | 4 | const mapStateToProps = state => { 5 | return { 6 | tags: state.bc5cdr.response.tagging, 7 | loading: state.bc5cdr.loading 8 | } 9 | } 10 | 11 | export default connect(mapStateToProps, null)(ResponseTextArea); 12 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/response-text-area/bioNlp-text-area.js: -------------------------------------------------------------------------------- 1 | import { connect } from 'react-redux'; 2 | import ResponseTextArea from '../../components/response-text-area'; 3 | 4 | const mapStateToProps = state => { 5 | return { 6 | tags: state.bioNlp.response.tags, 7 | loading: state.bioNlp.loading 8 | } 9 | } 10 | 11 | export default connect(mapStateToProps, null)(ResponseTextArea); 12 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/containers/response-text-area/container.js: -------------------------------------------------------------------------------- 1 | import { connect } from 'react-redux'; 2 | import ResponseTextAreaContainer from '../../components/response-text-area/container'; 3 | 4 | const mapStateToProps = state => { 5 | return { 6 | type: state.requestType.type 7 | } 8 | } 9 | 10 | export default connect(mapStateToProps, null)(ResponseTextAreaContainer); 11 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/enums/request-types.js: -------------------------------------------------------------------------------- 1 | export default Object.freeze({ 2 | BIO_NLP: 'bio', 3 | BC5CDR: 'bc5' 4 | }) 5 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | padding: 0; 4 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", 5 | "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", 6 | sans-serif; 7 | -webkit-font-smoothing: antialiased; 8 | -moz-osx-font-smoothing: grayscale; 9 | } 10 | 11 | code { 12 | font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New", 13 | monospace; 14 | } 15 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import './index.css'; 4 | import App from './pages/home'; 5 | import { createStore, applyMiddleware, compose } from 'redux'; 6 | import { Provider } from 'react-redux'; 7 | import thunk from 'redux-thunk'; 8 | import rootReducers from './reducers'; 9 | import * as serviceWorker from './serviceWorker'; 10 | 11 | require('dotenv').config(); 12 | const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose 13 | const store = createStore(rootReducers, composeEnhancers(applyMiddleware(thunk))); 14 | 15 | ReactDOM.render( 16 | 17 | 18 | 19 | , document.getElementById('root')); 20 | 21 | // If you want your app to work offline and load faster, you can change 22 | // unregister() to register() below. Note this comes with some pitfalls. 23 | // Learn more about service workers: http://bit.ly/CRA-PWA 24 | serviceWorker.unregister(); 25 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/pages/home/index.css: -------------------------------------------------------------------------------- 1 | .main-wrapper { 2 | color: #4f4f4f; 3 | } 4 | 5 | .text-center { 6 | text-align: center; 7 | } 8 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/pages/home/index.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import GithubFork from '../../components/fork'; 3 | import { Highlighter } from '../../components/highlighter'; 4 | import { Header } from '../../components/header'; 5 | import InputWithExamples from '../../containers/input-with-examples'; 6 | import ResponseTextAreaContainer from '../../containers/response-text-area/container'; 7 | import RequestRadio from '../../containers/request-type-radio' 8 | import { MuiThemeProvider, createMuiTheme } from '@material-ui/core/styles'; 9 | import Grid from '@material-ui/core/Grid'; 10 | import Card from '@material-ui/core/Card'; 11 | import CardContent from '@material-ui/core/CardContent'; 12 | import Typography from '@material-ui/core/Typography'; 13 | 14 | import './index.css'; 15 | 16 | const theme = createMuiTheme({ 17 | palette: { 18 | primary: { main: '#827717' }, // Purple and green play nicely together. 19 | secondary: { main: '#11cb5f' }, // This is just green.A700 as hex. 20 | text: { 21 | secondary: '#424242' 22 | } 23 | }, 24 | typography: { 25 | useNextVariants: true, 26 | }, 27 | props: { 28 | MuiButtonBase: { // Name of the component ⚛️ / style sheet 29 | text: { // Name of the rule 30 | color: '#f2f2f2', // Some CSS 31 | }, 32 | }, 33 | } 34 | }); 35 | 36 | 37 | class App extends Component { 38 | render() { 39 | return ( 40 | 41 |
42 | 43 | 44 | 45 |
46 | Solving BioNLP problems 47 |
48 | 49 | This App solves BioNLP problems using Bert(BioBert Pytorch) 50 | 51 |
52 | 53 | 54 | 55 |
56 | Description 57 |
58 | 59 | This app demonstrates how Bert(BioBert) can be finetuned and used to beat any state of the art result. In this we have trained it discover entites in medical text. In BioNLP13CG it finds entites like 'Anatomical_system', 'Cancer', 'Cell', 'Cellular_component', 'Developing_anatomical_structure', 'Gene_or_gene_product', 'Immaterial_anatomical_entity', 'Multi-tissue_structure', 'Organ', 'Organism', 'Organism_subdivision', 'Organism_substance', 'Pathological_formation', 'Simple_chemical', 'Tissue' and in BC5CDR it finds Disease and Chemicals. 60 | 61 |
62 |
63 |
64 | 65 |
DEMO
66 |
67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 |
75 |
76 |
77 | ); 78 | } 79 | } 80 | 81 | export default App; 82 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/pages/home/index.test.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import App from '.'; 4 | 5 | it('renders without crashing', () => { 6 | const div = document.createElement('div'); 7 | ReactDOM.render(, div); 8 | ReactDOM.unmountComponentAtNode(div); 9 | }); 10 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/reducers/bcdr.js: -------------------------------------------------------------------------------- 1 | import constants from '../redux-constants/fetch'; 2 | 3 | const initialState = { 4 | response: {}, 5 | request: { 6 | bc5cdr: 'BC5CDR', 7 | text: '' 8 | }, 9 | loading: false, 10 | error: null 11 | } 12 | 13 | export default (state = initialState, action) => { 14 | const immutatedState = { ...state }; 15 | switch (action.type) { 16 | case constants.FETCH_BC5CDR_REQUEST: 17 | immutatedState.loading = true; 18 | return immutatedState; 19 | case constants.FETCH_BC5CDR_SUCCESS: 20 | immutatedState.response = Object.assign({}, action.response); 21 | immutatedState.loading = false; 22 | immutatedState.error = null; 23 | return immutatedState; 24 | case constants.FETCH_BC5CDR_FAILURE: 25 | immutatedState.loading = false; 26 | immutatedState.error = { ...action.error }; 27 | return immutatedState; 28 | case constants.UPDATE_BC5CDR: 29 | immutatedState.request = {...state.request}; 30 | immutatedState.request.text = action.content; 31 | return immutatedState; 32 | default: 33 | return state; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/reducers/bioNlp.js: -------------------------------------------------------------------------------- 1 | import constants from '../redux-constants/fetch'; 2 | 3 | const initialState = { 4 | response: {}, 5 | request: { 6 | bionlp3g: 'BIO NLP 13CG', 7 | text: '' 8 | }, 9 | loading: false, 10 | error: null 11 | } 12 | 13 | export default (state = initialState, action) => { 14 | const immutatedState = { ...state }; 15 | switch (action.type) { 16 | case constants.FETCH_BIO_NLP_REQUEST: 17 | immutatedState.loading = true; 18 | return immutatedState; 19 | case constants.FETCH_BIO_NLP_SUCCESS: 20 | immutatedState.response = Object.assign({}, action.response); 21 | immutatedState.loading = false; 22 | immutatedState.error = null; 23 | return immutatedState; 24 | case constants.FETCH_BIO_NLP_FAILURE: 25 | immutatedState.loading = false; 26 | immutatedState.error = { ...action.error }; 27 | return immutatedState; 28 | case constants.UPDATE_BIO_NLP: 29 | immutatedState.request = {...state.request}; 30 | immutatedState.request.text = action.content; 31 | return immutatedState; 32 | default: 33 | return state; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/reducers/index.js: -------------------------------------------------------------------------------- 1 | import { combineReducers } from 'redux'; 2 | import bioNlp from './bioNlp'; 3 | import bc5cdr from './bcdr'; 4 | import requestType from './request-type'; 5 | 6 | export default combineReducers({ 7 | bioNlp, 8 | bc5cdr, 9 | requestType 10 | }); 11 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/reducers/request-type.js: -------------------------------------------------------------------------------- 1 | import constants from '../redux-constants/fetch'; 2 | import types from '../enums/request-types'; 3 | 4 | const initialState = { 5 | // type: types.BIO_NLP // Default State 6 | type: types.BC5CDR 7 | } 8 | 9 | export default (state = initialState, action) => { 10 | const immutatedState = { ...state }; 11 | switch (action.type) { 12 | case constants.REQUEST_TYPE_CHANGE: 13 | immutatedState.type = action.requestType; 14 | return immutatedState; 15 | default: 16 | return state; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/redux-constants/fetch.js: -------------------------------------------------------------------------------- 1 | export default { 2 | FETCH_BIO_NLP_REQUEST: 'FETCH_BIO_NLP_REQUEST', 3 | FETCH_BIO_NLP_SUCCESS: 'FETCH_BIO_NLP_SUCCESS', 4 | FETCH_BIO_NLP_FAILURE: 'FETCH_BIO_NLP_FAILURE', 5 | UPDATE_BIO_NLP: 'UPDATE_BIO_NLP', 6 | 7 | FETCH_BC5CDR_REQUEST: 'FETCH_BC5CDR_REQUEST', 8 | FETCH_BC5CDR_SUCCESS: 'FETCH_BC5CDR_SUCCESS', 9 | FETCH_BC5CDR_FAILURE: 'FETCH_BC5CDR_FAILURE', 10 | UPDATE_BC5CDR: 'UPDATE_BC5CDR', 11 | 12 | REQUEST_TYPE_CHANGE: 'REQUEST_TYPE_CHANGE', 13 | } 14 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/serviceWorker.js: -------------------------------------------------------------------------------- 1 | // This optional code is used to register a service worker. 2 | // register() is not called by default. 3 | 4 | // This lets the app load faster on subsequent visits in production, and gives 5 | // it offline capabilities. However, it also means that developers (and users) 6 | // will only see deployed updates on subsequent visits to a page, after all the 7 | // existing tabs open on the page have been closed, since previously cached 8 | // resources are updated in the background. 9 | 10 | // To learn more about the benefits of this model and instructions on how to 11 | // opt-in, read http://bit.ly/CRA-PWA 12 | 13 | const isLocalhost = Boolean( 14 | window.location.hostname === 'localhost' || 15 | // [::1] is the IPv6 localhost address. 16 | window.location.hostname === '[::1]' || 17 | // 127.0.0.1/8 is considered localhost for IPv4. 18 | window.location.hostname.match( 19 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/ 20 | ) 21 | ); 22 | 23 | export function register(config) { 24 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) { 25 | // The URL constructor is available in all browsers that support SW. 26 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location.href); 27 | if (publicUrl.origin !== window.location.origin) { 28 | // Our service worker won't work if PUBLIC_URL is on a different origin 29 | // from what our page is served on. This might happen if a CDN is used to 30 | // serve assets; see https://github.com/facebook/create-react-app/issues/2374 31 | return; 32 | } 33 | 34 | window.addEventListener('load', () => { 35 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`; 36 | 37 | if (isLocalhost) { 38 | // This is running on localhost. Let's check if a service worker still exists or not. 39 | checkValidServiceWorker(swUrl, config); 40 | 41 | // Add some additional logging to localhost, pointing developers to the 42 | // service worker/PWA documentation. 43 | navigator.serviceWorker.ready.then(() => { 44 | console.log( 45 | 'This web app is being served cache-first by a service ' + 46 | 'worker. To learn more, visit http://bit.ly/CRA-PWA' 47 | ); 48 | }); 49 | } else { 50 | // Is not localhost. Just register service worker 51 | registerValidSW(swUrl, config); 52 | } 53 | }); 54 | } 55 | } 56 | 57 | function registerValidSW(swUrl, config) { 58 | navigator.serviceWorker 59 | .register(swUrl) 60 | .then(registration => { 61 | registration.onupdatefound = () => { 62 | const installingWorker = registration.installing; 63 | if (installingWorker == null) { 64 | return; 65 | } 66 | installingWorker.onstatechange = () => { 67 | if (installingWorker.state === 'installed') { 68 | if (navigator.serviceWorker.controller) { 69 | // At this point, the updated precached content has been fetched, 70 | // but the previous service worker will still serve the older 71 | // content until all client tabs are closed. 72 | console.log( 73 | 'New content is available and will be used when all ' + 74 | 'tabs for this page are closed. See http://bit.ly/CRA-PWA.' 75 | ); 76 | 77 | // Execute callback 78 | if (config && config.onUpdate) { 79 | config.onUpdate(registration); 80 | } 81 | } else { 82 | // At this point, everything has been precached. 83 | // It's the perfect time to display a 84 | // "Content is cached for offline use." message. 85 | console.log('Content is cached for offline use.'); 86 | 87 | // Execute callback 88 | if (config && config.onSuccess) { 89 | config.onSuccess(registration); 90 | } 91 | } 92 | } 93 | }; 94 | }; 95 | }) 96 | .catch(error => { 97 | console.error('Error during service worker registration:', error); 98 | }); 99 | } 100 | 101 | function checkValidServiceWorker(swUrl, config) { 102 | // Check if the service worker can be found. If it can't reload the page. 103 | fetch(swUrl) 104 | .then(response => { 105 | // Ensure service worker exists, and that we really are getting a JS file. 106 | const contentType = response.headers.get('content-type'); 107 | if ( 108 | response.status === 404 || 109 | (contentType != null && contentType.indexOf('javascript') === -1) 110 | ) { 111 | // No service worker found. Probably a different app. Reload the page. 112 | navigator.serviceWorker.ready.then(registration => { 113 | registration.unregister().then(() => { 114 | window.location.reload(); 115 | }); 116 | }); 117 | } else { 118 | // Service worker found. Proceed as normal. 119 | registerValidSW(swUrl, config); 120 | } 121 | }) 122 | .catch(() => { 123 | console.log( 124 | 'No internet connection found. App is running in offline mode.' 125 | ); 126 | }); 127 | } 128 | 129 | export function unregister() { 130 | if ('serviceWorker' in navigator) { 131 | navigator.serviceWorker.ready.then(registration => { 132 | registration.unregister(); 133 | }); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/utils/mapCodeToColors.js: -------------------------------------------------------------------------------- 1 | export default { 2 | 'B-Chemical': { 3 | bg: '#ab47bc', 4 | fg: '#f2f2f2', 5 | }, 6 | 'I-Chemical': { 7 | bg: '#8e24aa', 8 | fg: '#f2f2f2', 9 | }, 10 | 'B-Disease': { 11 | bg: '#37474f', 12 | fg: '#f2f2f2', 13 | }, 14 | 'I-Disease': { 15 | bg: '#424242', 16 | fg: '#f2f2f2', 17 | }, 18 | 'B-Anatomical_system': { 19 | bg: '#ffd180', 20 | fg: '#333' 21 | }, 22 | 'B-Cancer': { 23 | bg: '#212121', 24 | fg: '#f2f2f2' 25 | }, 26 | 'B-Cell': { 27 | bg: '#43a047', 28 | fg: '#fff' 29 | }, 30 | 'B-Cellular_component': { 31 | bg: '#388e3c', 32 | fg: '#f2f2f2' 33 | }, 34 | 'B-Developing_anatomical_structure': { 35 | bg: '#ffb300', 36 | fg: '#333' 37 | }, 38 | 'B-Gene_or_gene_product': { 39 | bg: '#26a69a', 40 | fg: '#fff' 41 | }, 42 | 'B-Immaterial_anatomical_entity': { 43 | bg: '#78909c', 44 | fg: '#fff' 45 | }, 46 | 'B-Multi-tissue_structure': { 47 | bg: '#827717', 48 | fg: '#fff' 49 | }, 50 | 'B-Organ': { 51 | bg: '#d32f2f', 52 | fg: '#f2f2f2' 53 | }, 54 | 'B-Organism': { 55 | bg: '#689f38', 56 | fg: '#fff' 57 | }, 58 | 'B-Organism_subdivision': { 59 | bg: '#33691e', 60 | fg: '#f2f2f2' 61 | }, 62 | 'B-Organism_substance': { 63 | bg: '#795548', 64 | fg: '#f2f2f2' 65 | }, 66 | 'B-Pathological_formation': { 67 | bg: '#0288d1', 68 | fg: '#fff' 69 | } , 70 | 'B-Simple_chemical': { 71 | bg: '#d81b60', 72 | fg: '#f2f2f2' 73 | }, 74 | 'B-Tissue': { 75 | bg: '#673ab7', 76 | fg: '#fff' 77 | }, 78 | 'I-Amino_acid': { 79 | bg: '#558b2f', 80 | fg: '#fff' 81 | }, 82 | 'I-Anatomical_system': { 83 | bg: '#e64a19', 84 | fg: '#fff' 85 | }, 86 | 'I-Cancer': { 87 | bg: '#455a64', 88 | fg: '#f2f2f2' 89 | }, 90 | 'I-Cell': { 91 | bg: '#fbc02d', 92 | fg: '#333' 93 | }, 94 | 'I-Cellular_component': { 95 | bg: '#0097a7', 96 | fg: '#fff' 97 | }, 98 | 'I-Developing_anatomical_structure': { 99 | bg: '#303f9f', 100 | fg: '#f2f2f2' 101 | }, 102 | 'I-Gene_or_gene_product': { 103 | bg: '#512da8', 104 | fg: '#f2f2f2' 105 | }, 106 | 'I-Immaterial_anatomical_entity': { 107 | bg: '#6d4c41', 108 | fg: '#f2f2f2' 109 | }, 110 | 'I-Multi-tissue_structure': { 111 | bg: '#1976d2', 112 | fg: '#f2f2f2' 113 | }, 114 | 'I-Organ': { 115 | bg: '#ef5350', 116 | fg: '#fff' 117 | }, 118 | 'I-Organism': { 119 | bg: '#009688', 120 | fg: '#fff' 121 | }, 122 | 'I-Organism_subdivision': { 123 | bg: '#1de9b6', 124 | fg: '#333' 125 | }, 126 | 'I-Organism_substance': { 127 | bg: '#d84315', 128 | fg: '#fff' 129 | }, 130 | 'I-Pathological_formation': { 131 | bg: '#2962ff', 132 | fg: '#fff' 133 | }, 134 | 'I-Simple_chemical': { 135 | bg: '#4527a0', 136 | fg: '#f2f2f2' 137 | }, 138 | 'I-Tissue': { 139 | bg: '#fdd835', 140 | fg: '#333' 141 | }, 142 | } 143 | -------------------------------------------------------------------------------- /biobert_ner/analysis-ai/src/utils/params.js: -------------------------------------------------------------------------------- 1 | export default (params = {}) => { 2 | return Object.keys(params).reduce((queryString, key, index) => { 3 | queryString += (index !== 0 ? '&' : '') + window.encodeURIComponent(key) + '=' + window.encodeURIComponent(params[key]); 4 | return queryString; 5 | }, ''); 6 | } 7 | -------------------------------------------------------------------------------- /biobert_ner/api.py: -------------------------------------------------------------------------------- 1 | from data_load import HParams 2 | from new_model import Net 3 | from pytorch_pretrained_bert.modeling import BertConfig 4 | from pytorch_pretrained_bert import BertModel 5 | import parameters 6 | import numpy as np 7 | from starlette.applications import Starlette 8 | from starlette.responses import JSONResponse, HTMLResponse, RedirectResponse 9 | import torch 10 | import sys 11 | import uvicorn 12 | import aiohttp 13 | 14 | 15 | config = BertConfig(vocab_size_or_config_json_file=parameters.BERT_CONFIG_FILE) 16 | app = Starlette() 17 | 18 | 19 | def build_model(config, state_dict, hp): 20 | model = Net(config, vocab_len = len(hp.VOCAB), bert_state_dict=None) 21 | _ = model.load_state_dict(torch.load(state_dict, map_location='cpu')) 22 | _ = model.to('cpu') # inference 23 | return model 24 | 25 | 26 | # Model loaded 27 | bc5_model = build_model(config, parameters.BC5CDR_WEIGHT, HParams('bc5cdr')) 28 | bionlp13cg_model = build_model(config, parameters.BIONLP13CG_WEIGHT, HParams('bionlp3g')) 29 | 30 | 31 | # Process Query 32 | def process_query(query, hp, model): 33 | s = query 34 | split_s = ["[CLS]"] + s.split()+["[SEP]"] 35 | x = [] # list of ids 36 | is_heads = [] # list. 1: the token is the first piece of a word 37 | 38 | for w in split_s: 39 | tokens = hp.tokenizer.tokenize(w) if w not in ("[CLS]", "[SEP]") else [w] 40 | xx = hp.tokenizer.convert_tokens_to_ids(tokens) 41 | is_head = [1] + [0]*(len(tokens) - 1) 42 | x.extend(xx) 43 | is_heads.extend(is_head) 44 | 45 | x = torch.LongTensor(x).unsqueeze(dim=0) 46 | 47 | # Process query 48 | model.eval() 49 | _, _, y_pred = model(x, torch.Tensor([1, 2, 3])) # just a dummy y value 50 | preds = y_pred[0].cpu().numpy()[np.array(is_heads) == 1] # Get prediction where head is 1 51 | 52 | # convert to real tags and remove and tokens labels 53 | preds = [hp.idx2tag[i] for i in preds][1:-1] 54 | final_output = [] 55 | for word, label in zip(s.split(), preds): 56 | final_output.append([word, label]) 57 | return final_output 58 | 59 | 60 | def get_bc5cdr(query): 61 | hp = HParams('bc5cdr') 62 | print("bc5cdr -> ", query) 63 | out = process_query(query=query, hp=hp, model=bc5_model) 64 | return JSONResponse({'tagging': out}) 65 | 66 | 67 | def get_bionlp13cg(query): 68 | hp = HParams('bionlp3g') 69 | print("bionlp3g -> ", query) 70 | out = process_query(query=query, hp=hp, model=bionlp13cg_model) 71 | return JSONResponse({'tags': out}) 72 | 73 | 74 | @app.route("/extract-ner", methods=["GET"]) 75 | async def extract_ner(request): 76 | text = request.query_params["text"] 77 | if "bionlp3g" in request.query_params: 78 | return get_bionlp13cg(text) 79 | else: 80 | return get_bc5cdr(text) 81 | 82 | 83 | @app.route("/") 84 | def form(_): 85 | return HTMLResponse( 86 | """ 87 |

This app will find the NER!

88 |
89 |
91 | 92 | 93 |
94 |

More information can be found here https://github.com/MeRajat/SolvingAlmostAnythingWithBert
95 | Examples :-
96 | ## BIONLP13CG :-
97 | 1. Cooccurrence of reduced expression of alpha - catenin and overexpression of p53 is a predictor of lymph node metastasis in early gastric cancer .
98 | 2. In this review , the role of TSH - R gene alterations in benign and malignant thyroid neoplasia is examined .
99 | 100 | ## BC5CDR :-
101 | 1. The authors describe the case of a 56 - year - old woman with chronic , severe heart failure
102 | secondary to dilated cardiomyopathy and absence of significant ventricular arrhythmias
103 | who developed QT prolongation and torsade de pointes ventricular tachycardia during one cycle 104 |
of intermittent low dose ( 2 . 5 mcg / kg per min ) dobutamine .
105 |

106 | """) 107 | 108 | 109 | @app.route("/form") 110 | def redirect_to_homepage(_): 111 | return RedirectResponse("/") 112 | 113 | 114 | if __name__ == "__main__": 115 | # To run this app start application on server with python 116 | # python FILENAME serve 117 | # ex: python server.py server 118 | if "serve" in sys.argv: 119 | uvicorn.run(app, host="0.0.0.0", port=9000) 120 | -------------------------------------------------------------------------------- /biobert_ner/convert_to_pytorch_wt.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 30, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tensorflow as tf \n", 10 | "import re\n", 11 | "import torch\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 21, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "tf_path = 'weights/pubmed_pmc_470k/biobert_model.ckpt'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 22, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "init_vars = tf.train.list_variables(tf_path)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 23, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "excluded = ['BERTAdam','_power','global_step']\n", 40 | "init_vars = list(filter(lambda x:all([True if e not in x[0] else False for e in excluded]),init_vars))" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 24, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "[('bert/embeddings/LayerNorm/beta', [768]),\n", 52 | " ('bert/embeddings/LayerNorm/gamma', [768]),\n", 53 | " ('bert/embeddings/position_embeddings', [512, 768]),\n", 54 | " ('bert/embeddings/token_type_embeddings', [2, 768]),\n", 55 | " ('bert/embeddings/word_embeddings', [28996, 768]),\n", 56 | " ('bert/encoder/layer_0/attention/output/LayerNorm/beta', [768]),\n", 57 | " ('bert/encoder/layer_0/attention/output/LayerNorm/gamma', [768]),\n", 58 | " ('bert/encoder/layer_0/attention/output/dense/bias', [768]),\n", 59 | " ('bert/encoder/layer_0/attention/output/dense/kernel', [768, 768]),\n", 60 | " ('bert/encoder/layer_0/attention/self/key/bias', [768]),\n", 61 | " ('bert/encoder/layer_0/attention/self/key/kernel', [768, 768]),\n", 62 | " ('bert/encoder/layer_0/attention/self/query/bias', [768]),\n", 63 | " ('bert/encoder/layer_0/attention/self/query/kernel', [768, 768]),\n", 64 | " ('bert/encoder/layer_0/attention/self/value/bias', [768]),\n", 65 | " ('bert/encoder/layer_0/attention/self/value/kernel', [768, 768]),\n", 66 | " ('bert/encoder/layer_0/intermediate/dense/bias', [3072]),\n", 67 | " ('bert/encoder/layer_0/intermediate/dense/kernel', [768, 3072]),\n", 68 | " ('bert/encoder/layer_0/output/LayerNorm/beta', [768]),\n", 69 | " ('bert/encoder/layer_0/output/LayerNorm/gamma', [768]),\n", 70 | " ('bert/encoder/layer_0/output/dense/bias', [768]),\n", 71 | " ('bert/encoder/layer_0/output/dense/kernel', [3072, 768]),\n", 72 | " ('bert/encoder/layer_1/attention/output/LayerNorm/beta', [768]),\n", 73 | " ('bert/encoder/layer_1/attention/output/LayerNorm/gamma', [768]),\n", 74 | " ('bert/encoder/layer_1/attention/output/dense/bias', [768]),\n", 75 | " ('bert/encoder/layer_1/attention/output/dense/kernel', [768, 768]),\n", 76 | " ('bert/encoder/layer_1/attention/self/key/bias', [768]),\n", 77 | " ('bert/encoder/layer_1/attention/self/key/kernel', [768, 768]),\n", 78 | " ('bert/encoder/layer_1/attention/self/query/bias', [768]),\n", 79 | " ('bert/encoder/layer_1/attention/self/query/kernel', [768, 768]),\n", 80 | " ('bert/encoder/layer_1/attention/self/value/bias', [768]),\n", 81 | " ('bert/encoder/layer_1/attention/self/value/kernel', [768, 768]),\n", 82 | " ('bert/encoder/layer_1/intermediate/dense/bias', [3072]),\n", 83 | " ('bert/encoder/layer_1/intermediate/dense/kernel', [768, 3072]),\n", 84 | " ('bert/encoder/layer_1/output/LayerNorm/beta', [768]),\n", 85 | " ('bert/encoder/layer_1/output/LayerNorm/gamma', [768]),\n", 86 | " ('bert/encoder/layer_1/output/dense/bias', [768]),\n", 87 | " ('bert/encoder/layer_1/output/dense/kernel', [3072, 768]),\n", 88 | " ('bert/encoder/layer_10/attention/output/LayerNorm/beta', [768]),\n", 89 | " ('bert/encoder/layer_10/attention/output/LayerNorm/gamma', [768]),\n", 90 | " ('bert/encoder/layer_10/attention/output/dense/bias', [768]),\n", 91 | " ('bert/encoder/layer_10/attention/output/dense/kernel', [768, 768]),\n", 92 | " ('bert/encoder/layer_10/attention/self/key/bias', [768]),\n", 93 | " ('bert/encoder/layer_10/attention/self/key/kernel', [768, 768]),\n", 94 | " ('bert/encoder/layer_10/attention/self/query/bias', [768]),\n", 95 | " ('bert/encoder/layer_10/attention/self/query/kernel', [768, 768]),\n", 96 | " ('bert/encoder/layer_10/attention/self/value/bias', [768]),\n", 97 | " ('bert/encoder/layer_10/attention/self/value/kernel', [768, 768]),\n", 98 | " ('bert/encoder/layer_10/intermediate/dense/bias', [3072]),\n", 99 | " ('bert/encoder/layer_10/intermediate/dense/kernel', [768, 3072]),\n", 100 | " ('bert/encoder/layer_10/output/LayerNorm/beta', [768]),\n", 101 | " ('bert/encoder/layer_10/output/LayerNorm/gamma', [768]),\n", 102 | " ('bert/encoder/layer_10/output/dense/bias', [768]),\n", 103 | " ('bert/encoder/layer_10/output/dense/kernel', [3072, 768]),\n", 104 | " ('bert/encoder/layer_11/attention/output/LayerNorm/beta', [768]),\n", 105 | " ('bert/encoder/layer_11/attention/output/LayerNorm/gamma', [768]),\n", 106 | " ('bert/encoder/layer_11/attention/output/dense/bias', [768]),\n", 107 | " ('bert/encoder/layer_11/attention/output/dense/kernel', [768, 768]),\n", 108 | " ('bert/encoder/layer_11/attention/self/key/bias', [768]),\n", 109 | " ('bert/encoder/layer_11/attention/self/key/kernel', [768, 768]),\n", 110 | " ('bert/encoder/layer_11/attention/self/query/bias', [768]),\n", 111 | " ('bert/encoder/layer_11/attention/self/query/kernel', [768, 768]),\n", 112 | " ('bert/encoder/layer_11/attention/self/value/bias', [768]),\n", 113 | " ('bert/encoder/layer_11/attention/self/value/kernel', [768, 768]),\n", 114 | " ('bert/encoder/layer_11/intermediate/dense/bias', [3072]),\n", 115 | " ('bert/encoder/layer_11/intermediate/dense/kernel', [768, 3072]),\n", 116 | " ('bert/encoder/layer_11/output/LayerNorm/beta', [768]),\n", 117 | " ('bert/encoder/layer_11/output/LayerNorm/gamma', [768]),\n", 118 | " ('bert/encoder/layer_11/output/dense/bias', [768]),\n", 119 | " ('bert/encoder/layer_11/output/dense/kernel', [3072, 768]),\n", 120 | " ('bert/encoder/layer_2/attention/output/LayerNorm/beta', [768]),\n", 121 | " ('bert/encoder/layer_2/attention/output/LayerNorm/gamma', [768]),\n", 122 | " ('bert/encoder/layer_2/attention/output/dense/bias', [768]),\n", 123 | " ('bert/encoder/layer_2/attention/output/dense/kernel', [768, 768]),\n", 124 | " ('bert/encoder/layer_2/attention/self/key/bias', [768]),\n", 125 | " ('bert/encoder/layer_2/attention/self/key/kernel', [768, 768]),\n", 126 | " ('bert/encoder/layer_2/attention/self/query/bias', [768]),\n", 127 | " ('bert/encoder/layer_2/attention/self/query/kernel', [768, 768]),\n", 128 | " ('bert/encoder/layer_2/attention/self/value/bias', [768]),\n", 129 | " ('bert/encoder/layer_2/attention/self/value/kernel', [768, 768]),\n", 130 | " ('bert/encoder/layer_2/intermediate/dense/bias', [3072]),\n", 131 | " ('bert/encoder/layer_2/intermediate/dense/kernel', [768, 3072]),\n", 132 | " ('bert/encoder/layer_2/output/LayerNorm/beta', [768]),\n", 133 | " ('bert/encoder/layer_2/output/LayerNorm/gamma', [768]),\n", 134 | " ('bert/encoder/layer_2/output/dense/bias', [768]),\n", 135 | " ('bert/encoder/layer_2/output/dense/kernel', [3072, 768]),\n", 136 | " ('bert/encoder/layer_3/attention/output/LayerNorm/beta', [768]),\n", 137 | " ('bert/encoder/layer_3/attention/output/LayerNorm/gamma', [768]),\n", 138 | " ('bert/encoder/layer_3/attention/output/dense/bias', [768]),\n", 139 | " ('bert/encoder/layer_3/attention/output/dense/kernel', [768, 768]),\n", 140 | " ('bert/encoder/layer_3/attention/self/key/bias', [768]),\n", 141 | " ('bert/encoder/layer_3/attention/self/key/kernel', [768, 768]),\n", 142 | " ('bert/encoder/layer_3/attention/self/query/bias', [768]),\n", 143 | " ('bert/encoder/layer_3/attention/self/query/kernel', [768, 768]),\n", 144 | " ('bert/encoder/layer_3/attention/self/value/bias', [768]),\n", 145 | " ('bert/encoder/layer_3/attention/self/value/kernel', [768, 768]),\n", 146 | " ('bert/encoder/layer_3/intermediate/dense/bias', [3072]),\n", 147 | " ('bert/encoder/layer_3/intermediate/dense/kernel', [768, 3072]),\n", 148 | " ('bert/encoder/layer_3/output/LayerNorm/beta', [768]),\n", 149 | " ('bert/encoder/layer_3/output/LayerNorm/gamma', [768]),\n", 150 | " ('bert/encoder/layer_3/output/dense/bias', [768]),\n", 151 | " ('bert/encoder/layer_3/output/dense/kernel', [3072, 768]),\n", 152 | " ('bert/encoder/layer_4/attention/output/LayerNorm/beta', [768]),\n", 153 | " ('bert/encoder/layer_4/attention/output/LayerNorm/gamma', [768]),\n", 154 | " ('bert/encoder/layer_4/attention/output/dense/bias', [768]),\n", 155 | " ('bert/encoder/layer_4/attention/output/dense/kernel', [768, 768]),\n", 156 | " ('bert/encoder/layer_4/attention/self/key/bias', [768]),\n", 157 | " ('bert/encoder/layer_4/attention/self/key/kernel', [768, 768]),\n", 158 | " ('bert/encoder/layer_4/attention/self/query/bias', [768]),\n", 159 | " ('bert/encoder/layer_4/attention/self/query/kernel', [768, 768]),\n", 160 | " ('bert/encoder/layer_4/attention/self/value/bias', [768]),\n", 161 | " ('bert/encoder/layer_4/attention/self/value/kernel', [768, 768]),\n", 162 | " ('bert/encoder/layer_4/intermediate/dense/bias', [3072]),\n", 163 | " ('bert/encoder/layer_4/intermediate/dense/kernel', [768, 3072]),\n", 164 | " ('bert/encoder/layer_4/output/LayerNorm/beta', [768]),\n", 165 | " ('bert/encoder/layer_4/output/LayerNorm/gamma', [768]),\n", 166 | " ('bert/encoder/layer_4/output/dense/bias', [768]),\n", 167 | " ('bert/encoder/layer_4/output/dense/kernel', [3072, 768]),\n", 168 | " ('bert/encoder/layer_5/attention/output/LayerNorm/beta', [768]),\n", 169 | " ('bert/encoder/layer_5/attention/output/LayerNorm/gamma', [768]),\n", 170 | " ('bert/encoder/layer_5/attention/output/dense/bias', [768]),\n", 171 | " ('bert/encoder/layer_5/attention/output/dense/kernel', [768, 768]),\n", 172 | " ('bert/encoder/layer_5/attention/self/key/bias', [768]),\n", 173 | " ('bert/encoder/layer_5/attention/self/key/kernel', [768, 768]),\n", 174 | " ('bert/encoder/layer_5/attention/self/query/bias', [768]),\n", 175 | " ('bert/encoder/layer_5/attention/self/query/kernel', [768, 768]),\n", 176 | " ('bert/encoder/layer_5/attention/self/value/bias', [768]),\n", 177 | " ('bert/encoder/layer_5/attention/self/value/kernel', [768, 768]),\n", 178 | " ('bert/encoder/layer_5/intermediate/dense/bias', [3072]),\n", 179 | " ('bert/encoder/layer_5/intermediate/dense/kernel', [768, 3072]),\n", 180 | " ('bert/encoder/layer_5/output/LayerNorm/beta', [768]),\n", 181 | " ('bert/encoder/layer_5/output/LayerNorm/gamma', [768]),\n", 182 | " ('bert/encoder/layer_5/output/dense/bias', [768]),\n", 183 | " ('bert/encoder/layer_5/output/dense/kernel', [3072, 768]),\n", 184 | " ('bert/encoder/layer_6/attention/output/LayerNorm/beta', [768]),\n", 185 | " ('bert/encoder/layer_6/attention/output/LayerNorm/gamma', [768]),\n", 186 | " ('bert/encoder/layer_6/attention/output/dense/bias', [768]),\n", 187 | " ('bert/encoder/layer_6/attention/output/dense/kernel', [768, 768]),\n", 188 | " ('bert/encoder/layer_6/attention/self/key/bias', [768]),\n", 189 | " ('bert/encoder/layer_6/attention/self/key/kernel', [768, 768]),\n", 190 | " ('bert/encoder/layer_6/attention/self/query/bias', [768]),\n", 191 | " ('bert/encoder/layer_6/attention/self/query/kernel', [768, 768]),\n", 192 | " ('bert/encoder/layer_6/attention/self/value/bias', [768]),\n", 193 | " ('bert/encoder/layer_6/attention/self/value/kernel', [768, 768]),\n", 194 | " ('bert/encoder/layer_6/intermediate/dense/bias', [3072]),\n", 195 | " ('bert/encoder/layer_6/intermediate/dense/kernel', [768, 3072]),\n", 196 | " ('bert/encoder/layer_6/output/LayerNorm/beta', [768]),\n", 197 | " ('bert/encoder/layer_6/output/LayerNorm/gamma', [768]),\n", 198 | " ('bert/encoder/layer_6/output/dense/bias', [768]),\n", 199 | " ('bert/encoder/layer_6/output/dense/kernel', [3072, 768]),\n", 200 | " ('bert/encoder/layer_7/attention/output/LayerNorm/beta', [768]),\n", 201 | " ('bert/encoder/layer_7/attention/output/LayerNorm/gamma', [768]),\n", 202 | " ('bert/encoder/layer_7/attention/output/dense/bias', [768]),\n", 203 | " ('bert/encoder/layer_7/attention/output/dense/kernel', [768, 768]),\n", 204 | " ('bert/encoder/layer_7/attention/self/key/bias', [768]),\n", 205 | " ('bert/encoder/layer_7/attention/self/key/kernel', [768, 768]),\n", 206 | " ('bert/encoder/layer_7/attention/self/query/bias', [768]),\n", 207 | " ('bert/encoder/layer_7/attention/self/query/kernel', [768, 768]),\n", 208 | " ('bert/encoder/layer_7/attention/self/value/bias', [768]),\n", 209 | " ('bert/encoder/layer_7/attention/self/value/kernel', [768, 768]),\n", 210 | " ('bert/encoder/layer_7/intermediate/dense/bias', [3072]),\n", 211 | " ('bert/encoder/layer_7/intermediate/dense/kernel', [768, 3072]),\n", 212 | " ('bert/encoder/layer_7/output/LayerNorm/beta', [768]),\n", 213 | " ('bert/encoder/layer_7/output/LayerNorm/gamma', [768]),\n", 214 | " ('bert/encoder/layer_7/output/dense/bias', [768]),\n", 215 | " ('bert/encoder/layer_7/output/dense/kernel', [3072, 768]),\n", 216 | " ('bert/encoder/layer_8/attention/output/LayerNorm/beta', [768]),\n", 217 | " ('bert/encoder/layer_8/attention/output/LayerNorm/gamma', [768]),\n", 218 | " ('bert/encoder/layer_8/attention/output/dense/bias', [768]),\n", 219 | " ('bert/encoder/layer_8/attention/output/dense/kernel', [768, 768]),\n", 220 | " ('bert/encoder/layer_8/attention/self/key/bias', [768]),\n", 221 | " ('bert/encoder/layer_8/attention/self/key/kernel', [768, 768]),\n", 222 | " ('bert/encoder/layer_8/attention/self/query/bias', [768]),\n", 223 | " ('bert/encoder/layer_8/attention/self/query/kernel', [768, 768]),\n", 224 | " ('bert/encoder/layer_8/attention/self/value/bias', [768]),\n", 225 | " ('bert/encoder/layer_8/attention/self/value/kernel', [768, 768]),\n", 226 | " ('bert/encoder/layer_8/intermediate/dense/bias', [3072]),\n", 227 | " ('bert/encoder/layer_8/intermediate/dense/kernel', [768, 3072]),\n", 228 | " ('bert/encoder/layer_8/output/LayerNorm/beta', [768]),\n", 229 | " ('bert/encoder/layer_8/output/LayerNorm/gamma', [768]),\n", 230 | " ('bert/encoder/layer_8/output/dense/bias', [768]),\n", 231 | " ('bert/encoder/layer_8/output/dense/kernel', [3072, 768]),\n", 232 | " ('bert/encoder/layer_9/attention/output/LayerNorm/beta', [768]),\n", 233 | " ('bert/encoder/layer_9/attention/output/LayerNorm/gamma', [768]),\n", 234 | " ('bert/encoder/layer_9/attention/output/dense/bias', [768]),\n", 235 | " ('bert/encoder/layer_9/attention/output/dense/kernel', [768, 768]),\n", 236 | " ('bert/encoder/layer_9/attention/self/key/bias', [768]),\n", 237 | " ('bert/encoder/layer_9/attention/self/key/kernel', [768, 768]),\n", 238 | " ('bert/encoder/layer_9/attention/self/query/bias', [768]),\n", 239 | " ('bert/encoder/layer_9/attention/self/query/kernel', [768, 768]),\n", 240 | " ('bert/encoder/layer_9/attention/self/value/bias', [768]),\n", 241 | " ('bert/encoder/layer_9/attention/self/value/kernel', [768, 768]),\n", 242 | " ('bert/encoder/layer_9/intermediate/dense/bias', [3072]),\n", 243 | " ('bert/encoder/layer_9/intermediate/dense/kernel', [768, 3072]),\n", 244 | " ('bert/encoder/layer_9/output/LayerNorm/beta', [768]),\n", 245 | " ('bert/encoder/layer_9/output/LayerNorm/gamma', [768]),\n", 246 | " ('bert/encoder/layer_9/output/dense/bias', [768]),\n", 247 | " ('bert/encoder/layer_9/output/dense/kernel', [3072, 768]),\n", 248 | " ('bert/pooler/dense/bias', [768]),\n", 249 | " ('bert/pooler/dense/kernel', [768, 768]),\n", 250 | " ('cls/predictions/output_bias', [28996]),\n", 251 | " ('cls/predictions/transform/LayerNorm/beta', [768]),\n", 252 | " ('cls/predictions/transform/LayerNorm/gamma', [768]),\n", 253 | " ('cls/predictions/transform/dense/bias', [768]),\n", 254 | " ('cls/predictions/transform/dense/kernel', [768, 768]),\n", 255 | " ('cls/seq_relationship/output_bias', [2]),\n", 256 | " ('cls/seq_relationship/output_weights', [2, 768])]" 257 | ] 258 | }, 259 | "execution_count": 24, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "init_vars" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 25, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "name": "stdout", 275 | "output_type": "stream", 276 | "text": [ 277 | "Loading TF weight bert/embeddings/LayerNorm/beta with shape [768]\n", 278 | "Loading TF weight bert/embeddings/LayerNorm/gamma with shape [768]\n", 279 | "Loading TF weight bert/embeddings/position_embeddings with shape [512, 768]\n", 280 | "Loading TF weight bert/embeddings/token_type_embeddings with shape [2, 768]\n", 281 | "Loading TF weight bert/embeddings/word_embeddings with shape [28996, 768]\n", 282 | "Loading TF weight bert/encoder/layer_0/attention/output/LayerNorm/beta with shape [768]\n", 283 | "Loading TF weight bert/encoder/layer_0/attention/output/LayerNorm/gamma with shape [768]\n", 284 | "Loading TF weight bert/encoder/layer_0/attention/output/dense/bias with shape [768]\n", 285 | "Loading TF weight bert/encoder/layer_0/attention/output/dense/kernel with shape [768, 768]\n", 286 | "Loading TF weight bert/encoder/layer_0/attention/self/key/bias with shape [768]\n", 287 | "Loading TF weight bert/encoder/layer_0/attention/self/key/kernel with shape [768, 768]\n", 288 | "Loading TF weight bert/encoder/layer_0/attention/self/query/bias with shape [768]\n", 289 | "Loading TF weight bert/encoder/layer_0/attention/self/query/kernel with shape [768, 768]\n", 290 | "Loading TF weight bert/encoder/layer_0/attention/self/value/bias with shape [768]\n", 291 | "Loading TF weight bert/encoder/layer_0/attention/self/value/kernel with shape [768, 768]\n", 292 | "Loading TF weight bert/encoder/layer_0/intermediate/dense/bias with shape [3072]\n", 293 | "Loading TF weight bert/encoder/layer_0/intermediate/dense/kernel with shape [768, 3072]\n", 294 | "Loading TF weight bert/encoder/layer_0/output/LayerNorm/beta with shape [768]\n", 295 | "Loading TF weight bert/encoder/layer_0/output/LayerNorm/gamma with shape [768]\n", 296 | "Loading TF weight bert/encoder/layer_0/output/dense/bias with shape [768]\n", 297 | "Loading TF weight bert/encoder/layer_0/output/dense/kernel with shape [3072, 768]\n", 298 | "Loading TF weight bert/encoder/layer_1/attention/output/LayerNorm/beta with shape [768]\n", 299 | "Loading TF weight bert/encoder/layer_1/attention/output/LayerNorm/gamma with shape [768]\n", 300 | "Loading TF weight bert/encoder/layer_1/attention/output/dense/bias with shape [768]\n", 301 | "Loading TF weight bert/encoder/layer_1/attention/output/dense/kernel with shape [768, 768]\n", 302 | "Loading TF weight bert/encoder/layer_1/attention/self/key/bias with shape [768]\n", 303 | "Loading TF weight bert/encoder/layer_1/attention/self/key/kernel with shape [768, 768]\n", 304 | "Loading TF weight bert/encoder/layer_1/attention/self/query/bias with shape [768]\n", 305 | "Loading TF weight bert/encoder/layer_1/attention/self/query/kernel with shape [768, 768]\n", 306 | "Loading TF weight bert/encoder/layer_1/attention/self/value/bias with shape [768]\n", 307 | "Loading TF weight bert/encoder/layer_1/attention/self/value/kernel with shape [768, 768]\n", 308 | "Loading TF weight bert/encoder/layer_1/intermediate/dense/bias with shape [3072]\n", 309 | "Loading TF weight bert/encoder/layer_1/intermediate/dense/kernel with shape [768, 3072]\n", 310 | "Loading TF weight bert/encoder/layer_1/output/LayerNorm/beta with shape [768]\n", 311 | "Loading TF weight bert/encoder/layer_1/output/LayerNorm/gamma with shape [768]\n", 312 | "Loading TF weight bert/encoder/layer_1/output/dense/bias with shape [768]\n", 313 | "Loading TF weight bert/encoder/layer_1/output/dense/kernel with shape [3072, 768]\n", 314 | "Loading TF weight bert/encoder/layer_10/attention/output/LayerNorm/beta with shape [768]\n", 315 | "Loading TF weight bert/encoder/layer_10/attention/output/LayerNorm/gamma with shape [768]\n", 316 | "Loading TF weight bert/encoder/layer_10/attention/output/dense/bias with shape [768]\n", 317 | "Loading TF weight bert/encoder/layer_10/attention/output/dense/kernel with shape [768, 768]\n", 318 | "Loading TF weight bert/encoder/layer_10/attention/self/key/bias with shape [768]\n", 319 | "Loading TF weight bert/encoder/layer_10/attention/self/key/kernel with shape [768, 768]\n", 320 | "Loading TF weight bert/encoder/layer_10/attention/self/query/bias with shape [768]\n", 321 | "Loading TF weight bert/encoder/layer_10/attention/self/query/kernel with shape [768, 768]\n", 322 | "Loading TF weight bert/encoder/layer_10/attention/self/value/bias with shape [768]\n", 323 | "Loading TF weight bert/encoder/layer_10/attention/self/value/kernel with shape [768, 768]\n", 324 | "Loading TF weight bert/encoder/layer_10/intermediate/dense/bias with shape [3072]\n", 325 | "Loading TF weight bert/encoder/layer_10/intermediate/dense/kernel with shape [768, 3072]\n", 326 | "Loading TF weight bert/encoder/layer_10/output/LayerNorm/beta with shape [768]\n", 327 | "Loading TF weight bert/encoder/layer_10/output/LayerNorm/gamma with shape [768]\n", 328 | "Loading TF weight bert/encoder/layer_10/output/dense/bias with shape [768]\n", 329 | "Loading TF weight bert/encoder/layer_10/output/dense/kernel with shape [3072, 768]\n", 330 | "Loading TF weight bert/encoder/layer_11/attention/output/LayerNorm/beta with shape [768]\n", 331 | "Loading TF weight bert/encoder/layer_11/attention/output/LayerNorm/gamma with shape [768]\n", 332 | "Loading TF weight bert/encoder/layer_11/attention/output/dense/bias with shape [768]\n", 333 | "Loading TF weight bert/encoder/layer_11/attention/output/dense/kernel with shape [768, 768]\n", 334 | "Loading TF weight bert/encoder/layer_11/attention/self/key/bias with shape [768]\n", 335 | "Loading TF weight bert/encoder/layer_11/attention/self/key/kernel with shape [768, 768]\n", 336 | "Loading TF weight bert/encoder/layer_11/attention/self/query/bias with shape [768]\n", 337 | "Loading TF weight bert/encoder/layer_11/attention/self/query/kernel with shape [768, 768]\n", 338 | "Loading TF weight bert/encoder/layer_11/attention/self/value/bias with shape [768]\n", 339 | "Loading TF weight bert/encoder/layer_11/attention/self/value/kernel with shape [768, 768]\n", 340 | "Loading TF weight bert/encoder/layer_11/intermediate/dense/bias with shape [3072]\n", 341 | "Loading TF weight bert/encoder/layer_11/intermediate/dense/kernel with shape [768, 3072]\n", 342 | "Loading TF weight bert/encoder/layer_11/output/LayerNorm/beta with shape [768]\n", 343 | "Loading TF weight bert/encoder/layer_11/output/LayerNorm/gamma with shape [768]\n", 344 | "Loading TF weight bert/encoder/layer_11/output/dense/bias with shape [768]\n", 345 | "Loading TF weight bert/encoder/layer_11/output/dense/kernel with shape [3072, 768]\n", 346 | "Loading TF weight bert/encoder/layer_2/attention/output/LayerNorm/beta with shape [768]\n", 347 | "Loading TF weight bert/encoder/layer_2/attention/output/LayerNorm/gamma with shape [768]\n", 348 | "Loading TF weight bert/encoder/layer_2/attention/output/dense/bias with shape [768]\n", 349 | "Loading TF weight bert/encoder/layer_2/attention/output/dense/kernel with shape [768, 768]\n", 350 | "Loading TF weight bert/encoder/layer_2/attention/self/key/bias with shape [768]\n", 351 | "Loading TF weight bert/encoder/layer_2/attention/self/key/kernel with shape [768, 768]\n", 352 | "Loading TF weight bert/encoder/layer_2/attention/self/query/bias with shape [768]\n", 353 | "Loading TF weight bert/encoder/layer_2/attention/self/query/kernel with shape [768, 768]\n", 354 | "Loading TF weight bert/encoder/layer_2/attention/self/value/bias with shape [768]\n", 355 | "Loading TF weight bert/encoder/layer_2/attention/self/value/kernel with shape [768, 768]\n", 356 | "Loading TF weight bert/encoder/layer_2/intermediate/dense/bias with shape [3072]\n", 357 | "Loading TF weight bert/encoder/layer_2/intermediate/dense/kernel with shape [768, 3072]\n", 358 | "Loading TF weight bert/encoder/layer_2/output/LayerNorm/beta with shape [768]\n", 359 | "Loading TF weight bert/encoder/layer_2/output/LayerNorm/gamma with shape [768]\n", 360 | "Loading TF weight bert/encoder/layer_2/output/dense/bias with shape [768]\n", 361 | "Loading TF weight bert/encoder/layer_2/output/dense/kernel with shape [3072, 768]\n", 362 | "Loading TF weight bert/encoder/layer_3/attention/output/LayerNorm/beta with shape [768]\n", 363 | "Loading TF weight bert/encoder/layer_3/attention/output/LayerNorm/gamma with shape [768]\n", 364 | "Loading TF weight bert/encoder/layer_3/attention/output/dense/bias with shape [768]\n", 365 | "Loading TF weight bert/encoder/layer_3/attention/output/dense/kernel with shape [768, 768]\n", 366 | "Loading TF weight bert/encoder/layer_3/attention/self/key/bias with shape [768]\n", 367 | "Loading TF weight bert/encoder/layer_3/attention/self/key/kernel with shape [768, 768]\n", 368 | "Loading TF weight bert/encoder/layer_3/attention/self/query/bias with shape [768]\n", 369 | "Loading TF weight bert/encoder/layer_3/attention/self/query/kernel with shape [768, 768]\n", 370 | "Loading TF weight bert/encoder/layer_3/attention/self/value/bias with shape [768]\n", 371 | "Loading TF weight bert/encoder/layer_3/attention/self/value/kernel with shape [768, 768]\n", 372 | "Loading TF weight bert/encoder/layer_3/intermediate/dense/bias with shape [3072]\n", 373 | "Loading TF weight bert/encoder/layer_3/intermediate/dense/kernel with shape [768, 3072]\n", 374 | "Loading TF weight bert/encoder/layer_3/output/LayerNorm/beta with shape [768]\n", 375 | "Loading TF weight bert/encoder/layer_3/output/LayerNorm/gamma with shape [768]\n", 376 | "Loading TF weight bert/encoder/layer_3/output/dense/bias with shape [768]\n", 377 | "Loading TF weight bert/encoder/layer_3/output/dense/kernel with shape [3072, 768]\n", 378 | "Loading TF weight bert/encoder/layer_4/attention/output/LayerNorm/beta with shape [768]\n", 379 | "Loading TF weight bert/encoder/layer_4/attention/output/LayerNorm/gamma with shape [768]\n", 380 | "Loading TF weight bert/encoder/layer_4/attention/output/dense/bias with shape [768]\n", 381 | "Loading TF weight bert/encoder/layer_4/attention/output/dense/kernel with shape [768, 768]\n", 382 | "Loading TF weight bert/encoder/layer_4/attention/self/key/bias with shape [768]\n", 383 | "Loading TF weight bert/encoder/layer_4/attention/self/key/kernel with shape [768, 768]\n", 384 | "Loading TF weight bert/encoder/layer_4/attention/self/query/bias with shape [768]\n", 385 | "Loading TF weight bert/encoder/layer_4/attention/self/query/kernel with shape [768, 768]\n", 386 | "Loading TF weight bert/encoder/layer_4/attention/self/value/bias with shape [768]\n", 387 | "Loading TF weight bert/encoder/layer_4/attention/self/value/kernel with shape [768, 768]\n", 388 | "Loading TF weight bert/encoder/layer_4/intermediate/dense/bias with shape [3072]\n", 389 | "Loading TF weight bert/encoder/layer_4/intermediate/dense/kernel with shape [768, 3072]\n", 390 | "Loading TF weight bert/encoder/layer_4/output/LayerNorm/beta with shape [768]\n", 391 | "Loading TF weight bert/encoder/layer_4/output/LayerNorm/gamma with shape [768]\n", 392 | "Loading TF weight bert/encoder/layer_4/output/dense/bias with shape [768]\n", 393 | "Loading TF weight bert/encoder/layer_4/output/dense/kernel with shape [3072, 768]\n", 394 | "Loading TF weight bert/encoder/layer_5/attention/output/LayerNorm/beta with shape [768]\n", 395 | "Loading TF weight bert/encoder/layer_5/attention/output/LayerNorm/gamma with shape [768]\n", 396 | "Loading TF weight bert/encoder/layer_5/attention/output/dense/bias with shape [768]\n", 397 | "Loading TF weight bert/encoder/layer_5/attention/output/dense/kernel with shape [768, 768]\n", 398 | "Loading TF weight bert/encoder/layer_5/attention/self/key/bias with shape [768]\n", 399 | "Loading TF weight bert/encoder/layer_5/attention/self/key/kernel with shape [768, 768]\n", 400 | "Loading TF weight bert/encoder/layer_5/attention/self/query/bias with shape [768]\n", 401 | "Loading TF weight bert/encoder/layer_5/attention/self/query/kernel with shape [768, 768]\n", 402 | "Loading TF weight bert/encoder/layer_5/attention/self/value/bias with shape [768]\n", 403 | "Loading TF weight bert/encoder/layer_5/attention/self/value/kernel with shape [768, 768]\n", 404 | "Loading TF weight bert/encoder/layer_5/intermediate/dense/bias with shape [3072]\n", 405 | "Loading TF weight bert/encoder/layer_5/intermediate/dense/kernel with shape [768, 3072]\n", 406 | "Loading TF weight bert/encoder/layer_5/output/LayerNorm/beta with shape [768]\n", 407 | "Loading TF weight bert/encoder/layer_5/output/LayerNorm/gamma with shape [768]\n", 408 | "Loading TF weight bert/encoder/layer_5/output/dense/bias with shape [768]\n" 409 | ] 410 | }, 411 | { 412 | "name": "stdout", 413 | "output_type": "stream", 414 | "text": [ 415 | "Loading TF weight bert/encoder/layer_5/output/dense/kernel with shape [3072, 768]\n", 416 | "Loading TF weight bert/encoder/layer_6/attention/output/LayerNorm/beta with shape [768]\n", 417 | "Loading TF weight bert/encoder/layer_6/attention/output/LayerNorm/gamma with shape [768]\n", 418 | "Loading TF weight bert/encoder/layer_6/attention/output/dense/bias with shape [768]\n", 419 | "Loading TF weight bert/encoder/layer_6/attention/output/dense/kernel with shape [768, 768]\n", 420 | "Loading TF weight bert/encoder/layer_6/attention/self/key/bias with shape [768]\n", 421 | "Loading TF weight bert/encoder/layer_6/attention/self/key/kernel with shape [768, 768]\n", 422 | "Loading TF weight bert/encoder/layer_6/attention/self/query/bias with shape [768]\n", 423 | "Loading TF weight bert/encoder/layer_6/attention/self/query/kernel with shape [768, 768]\n", 424 | "Loading TF weight bert/encoder/layer_6/attention/self/value/bias with shape [768]\n", 425 | "Loading TF weight bert/encoder/layer_6/attention/self/value/kernel with shape [768, 768]\n", 426 | "Loading TF weight bert/encoder/layer_6/intermediate/dense/bias with shape [3072]\n", 427 | "Loading TF weight bert/encoder/layer_6/intermediate/dense/kernel with shape [768, 3072]\n", 428 | "Loading TF weight bert/encoder/layer_6/output/LayerNorm/beta with shape [768]\n", 429 | "Loading TF weight bert/encoder/layer_6/output/LayerNorm/gamma with shape [768]\n", 430 | "Loading TF weight bert/encoder/layer_6/output/dense/bias with shape [768]\n", 431 | "Loading TF weight bert/encoder/layer_6/output/dense/kernel with shape [3072, 768]\n", 432 | "Loading TF weight bert/encoder/layer_7/attention/output/LayerNorm/beta with shape [768]\n", 433 | "Loading TF weight bert/encoder/layer_7/attention/output/LayerNorm/gamma with shape [768]\n", 434 | "Loading TF weight bert/encoder/layer_7/attention/output/dense/bias with shape [768]\n", 435 | "Loading TF weight bert/encoder/layer_7/attention/output/dense/kernel with shape [768, 768]\n", 436 | "Loading TF weight bert/encoder/layer_7/attention/self/key/bias with shape [768]\n", 437 | "Loading TF weight bert/encoder/layer_7/attention/self/key/kernel with shape [768, 768]\n", 438 | "Loading TF weight bert/encoder/layer_7/attention/self/query/bias with shape [768]\n", 439 | "Loading TF weight bert/encoder/layer_7/attention/self/query/kernel with shape [768, 768]\n", 440 | "Loading TF weight bert/encoder/layer_7/attention/self/value/bias with shape [768]\n", 441 | "Loading TF weight bert/encoder/layer_7/attention/self/value/kernel with shape [768, 768]\n", 442 | "Loading TF weight bert/encoder/layer_7/intermediate/dense/bias with shape [3072]\n", 443 | "Loading TF weight bert/encoder/layer_7/intermediate/dense/kernel with shape [768, 3072]\n", 444 | "Loading TF weight bert/encoder/layer_7/output/LayerNorm/beta with shape [768]\n", 445 | "Loading TF weight bert/encoder/layer_7/output/LayerNorm/gamma with shape [768]\n", 446 | "Loading TF weight bert/encoder/layer_7/output/dense/bias with shape [768]\n", 447 | "Loading TF weight bert/encoder/layer_7/output/dense/kernel with shape [3072, 768]\n", 448 | "Loading TF weight bert/encoder/layer_8/attention/output/LayerNorm/beta with shape [768]\n", 449 | "Loading TF weight bert/encoder/layer_8/attention/output/LayerNorm/gamma with shape [768]\n", 450 | "Loading TF weight bert/encoder/layer_8/attention/output/dense/bias with shape [768]\n", 451 | "Loading TF weight bert/encoder/layer_8/attention/output/dense/kernel with shape [768, 768]\n", 452 | "Loading TF weight bert/encoder/layer_8/attention/self/key/bias with shape [768]\n", 453 | "Loading TF weight bert/encoder/layer_8/attention/self/key/kernel with shape [768, 768]\n", 454 | "Loading TF weight bert/encoder/layer_8/attention/self/query/bias with shape [768]\n", 455 | "Loading TF weight bert/encoder/layer_8/attention/self/query/kernel with shape [768, 768]\n", 456 | "Loading TF weight bert/encoder/layer_8/attention/self/value/bias with shape [768]\n", 457 | "Loading TF weight bert/encoder/layer_8/attention/self/value/kernel with shape [768, 768]\n", 458 | "Loading TF weight bert/encoder/layer_8/intermediate/dense/bias with shape [3072]\n", 459 | "Loading TF weight bert/encoder/layer_8/intermediate/dense/kernel with shape [768, 3072]\n", 460 | "Loading TF weight bert/encoder/layer_8/output/LayerNorm/beta with shape [768]\n", 461 | "Loading TF weight bert/encoder/layer_8/output/LayerNorm/gamma with shape [768]\n", 462 | "Loading TF weight bert/encoder/layer_8/output/dense/bias with shape [768]\n", 463 | "Loading TF weight bert/encoder/layer_8/output/dense/kernel with shape [3072, 768]\n", 464 | "Loading TF weight bert/encoder/layer_9/attention/output/LayerNorm/beta with shape [768]\n", 465 | "Loading TF weight bert/encoder/layer_9/attention/output/LayerNorm/gamma with shape [768]\n", 466 | "Loading TF weight bert/encoder/layer_9/attention/output/dense/bias with shape [768]\n", 467 | "Loading TF weight bert/encoder/layer_9/attention/output/dense/kernel with shape [768, 768]\n", 468 | "Loading TF weight bert/encoder/layer_9/attention/self/key/bias with shape [768]\n", 469 | "Loading TF weight bert/encoder/layer_9/attention/self/key/kernel with shape [768, 768]\n", 470 | "Loading TF weight bert/encoder/layer_9/attention/self/query/bias with shape [768]\n", 471 | "Loading TF weight bert/encoder/layer_9/attention/self/query/kernel with shape [768, 768]\n", 472 | "Loading TF weight bert/encoder/layer_9/attention/self/value/bias with shape [768]\n", 473 | "Loading TF weight bert/encoder/layer_9/attention/self/value/kernel with shape [768, 768]\n", 474 | "Loading TF weight bert/encoder/layer_9/intermediate/dense/bias with shape [3072]\n", 475 | "Loading TF weight bert/encoder/layer_9/intermediate/dense/kernel with shape [768, 3072]\n", 476 | "Loading TF weight bert/encoder/layer_9/output/LayerNorm/beta with shape [768]\n", 477 | "Loading TF weight bert/encoder/layer_9/output/LayerNorm/gamma with shape [768]\n", 478 | "Loading TF weight bert/encoder/layer_9/output/dense/bias with shape [768]\n", 479 | "Loading TF weight bert/encoder/layer_9/output/dense/kernel with shape [3072, 768]\n", 480 | "Loading TF weight bert/pooler/dense/bias with shape [768]\n", 481 | "Loading TF weight bert/pooler/dense/kernel with shape [768, 768]\n", 482 | "Loading TF weight cls/predictions/output_bias with shape [28996]\n", 483 | "Loading TF weight cls/predictions/transform/LayerNorm/beta with shape [768]\n", 484 | "Loading TF weight cls/predictions/transform/LayerNorm/gamma with shape [768]\n", 485 | "Loading TF weight cls/predictions/transform/dense/bias with shape [768]\n", 486 | "Loading TF weight cls/predictions/transform/dense/kernel with shape [768, 768]\n", 487 | "Loading TF weight cls/seq_relationship/output_bias with shape [2]\n", 488 | "Loading TF weight cls/seq_relationship/output_weights with shape [2, 768]\n" 489 | ] 490 | } 491 | ], 492 | "source": [ 493 | "names = []\n", 494 | "arrays = []\n", 495 | "for name, shape in init_vars:\n", 496 | " print(\"Loading TF weight {} with shape {}\".format(name, shape))\n", 497 | " array = tf.train.load_variable(tf_path, name)\n", 498 | " names.append(name)\n", 499 | " arrays.append(array)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 26, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "from pytorch_pretrained_bert import BertConfig, BertForPreTraining" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 27, 514 | "metadata": { 515 | "scrolled": true 516 | }, 517 | "outputs": [ 518 | { 519 | "name": "stdout", 520 | "output_type": "stream", 521 | "text": [ 522 | "Building PyTorch model from configuration: {\n", 523 | " \"attention_probs_dropout_prob\": 0.1,\n", 524 | " \"hidden_act\": \"gelu\",\n", 525 | " \"hidden_dropout_prob\": 0.1,\n", 526 | " \"hidden_size\": 768,\n", 527 | " \"initializer_range\": 0.02,\n", 528 | " \"intermediate_size\": 3072,\n", 529 | " \"max_position_embeddings\": 512,\n", 530 | " \"num_attention_heads\": 12,\n", 531 | " \"num_hidden_layers\": 12,\n", 532 | " \"type_vocab_size\": 2,\n", 533 | " \"vocab_size\": 28996\n", 534 | "}\n", 535 | "\n" 536 | ] 537 | } 538 | ], 539 | "source": [ 540 | "# Initialise PyTorch model\n", 541 | "config = BertConfig.from_json_file('weights/pubmed_pmc_470k/bert_config.json')\n", 542 | "print(\"Building PyTorch model from configuration: {}\".format(str(config)))\n", 543 | "model = BertForPreTraining(config)\n" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 31, 549 | "metadata": {}, 550 | "outputs": [ 551 | { 552 | "name": "stdout", 553 | "output_type": "stream", 554 | "text": [ 555 | "Initialize PyTorch weight ['bert', 'embeddings', 'LayerNorm', 'beta']\n", 556 | "Initialize PyTorch weight ['bert', 'embeddings', 'LayerNorm', 'gamma']\n", 557 | "Initialize PyTorch weight ['bert', 'embeddings', 'position_embeddings']\n", 558 | "Initialize PyTorch weight ['bert', 'embeddings', 'token_type_embeddings']\n", 559 | "Initialize PyTorch weight ['bert', 'embeddings', 'word_embeddings']\n", 560 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'output', 'LayerNorm', 'beta']\n", 561 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'output', 'LayerNorm', 'gamma']\n", 562 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'output', 'dense', 'bias']\n", 563 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'output', 'dense', 'kernel']\n", 564 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'self', 'key', 'bias']\n", 565 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'self', 'key', 'kernel']\n", 566 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'self', 'query', 'bias']\n", 567 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'self', 'query', 'kernel']\n", 568 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'self', 'value', 'bias']\n", 569 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'attention', 'self', 'value', 'kernel']\n", 570 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'intermediate', 'dense', 'bias']\n", 571 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'intermediate', 'dense', 'kernel']\n", 572 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'output', 'LayerNorm', 'beta']\n", 573 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'output', 'LayerNorm', 'gamma']\n", 574 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'output', 'dense', 'bias']\n", 575 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_0', 'output', 'dense', 'kernel']\n", 576 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'output', 'LayerNorm', 'beta']\n", 577 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'output', 'LayerNorm', 'gamma']\n", 578 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'output', 'dense', 'bias']\n", 579 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'output', 'dense', 'kernel']\n", 580 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'self', 'key', 'bias']\n", 581 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'self', 'key', 'kernel']\n", 582 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'self', 'query', 'bias']\n", 583 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'self', 'query', 'kernel']\n", 584 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'self', 'value', 'bias']\n", 585 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'attention', 'self', 'value', 'kernel']\n", 586 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'intermediate', 'dense', 'bias']\n", 587 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'intermediate', 'dense', 'kernel']\n", 588 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'output', 'LayerNorm', 'beta']\n", 589 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'output', 'LayerNorm', 'gamma']\n", 590 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'output', 'dense', 'bias']\n", 591 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_1', 'output', 'dense', 'kernel']\n", 592 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'output', 'LayerNorm', 'beta']\n", 593 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'output', 'LayerNorm', 'gamma']\n", 594 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'output', 'dense', 'bias']\n", 595 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'output', 'dense', 'kernel']\n", 596 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'self', 'key', 'bias']\n", 597 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'self', 'key', 'kernel']\n", 598 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'self', 'query', 'bias']\n", 599 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'self', 'query', 'kernel']\n", 600 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'self', 'value', 'bias']\n", 601 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'attention', 'self', 'value', 'kernel']\n", 602 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'intermediate', 'dense', 'bias']\n", 603 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'intermediate', 'dense', 'kernel']\n", 604 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'output', 'LayerNorm', 'beta']\n", 605 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'output', 'LayerNorm', 'gamma']\n", 606 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'output', 'dense', 'bias']\n", 607 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_10', 'output', 'dense', 'kernel']\n", 608 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'output', 'LayerNorm', 'beta']\n", 609 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'output', 'LayerNorm', 'gamma']\n", 610 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'output', 'dense', 'bias']\n", 611 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'output', 'dense', 'kernel']\n", 612 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'self', 'key', 'bias']\n", 613 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'self', 'key', 'kernel']\n", 614 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'self', 'query', 'bias']\n", 615 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'self', 'query', 'kernel']\n", 616 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'self', 'value', 'bias']\n", 617 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'attention', 'self', 'value', 'kernel']\n", 618 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'intermediate', 'dense', 'bias']\n", 619 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'intermediate', 'dense', 'kernel']\n", 620 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'output', 'LayerNorm', 'beta']\n", 621 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'output', 'LayerNorm', 'gamma']\n", 622 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'output', 'dense', 'bias']\n", 623 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_11', 'output', 'dense', 'kernel']\n", 624 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'output', 'LayerNorm', 'beta']\n", 625 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'output', 'LayerNorm', 'gamma']\n", 626 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'output', 'dense', 'bias']\n", 627 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'output', 'dense', 'kernel']\n", 628 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'self', 'key', 'bias']\n", 629 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'self', 'key', 'kernel']\n", 630 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'self', 'query', 'bias']\n", 631 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'self', 'query', 'kernel']\n", 632 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'self', 'value', 'bias']\n", 633 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'attention', 'self', 'value', 'kernel']\n", 634 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'intermediate', 'dense', 'bias']\n", 635 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'intermediate', 'dense', 'kernel']\n", 636 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'output', 'LayerNorm', 'beta']\n", 637 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'output', 'LayerNorm', 'gamma']\n", 638 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'output', 'dense', 'bias']\n", 639 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_2', 'output', 'dense', 'kernel']\n", 640 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'output', 'LayerNorm', 'beta']\n", 641 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'output', 'LayerNorm', 'gamma']\n", 642 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'output', 'dense', 'bias']\n", 643 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'output', 'dense', 'kernel']\n", 644 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'self', 'key', 'bias']\n", 645 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'self', 'key', 'kernel']\n", 646 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'self', 'query', 'bias']\n", 647 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'self', 'query', 'kernel']\n", 648 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'self', 'value', 'bias']\n", 649 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'attention', 'self', 'value', 'kernel']\n", 650 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'intermediate', 'dense', 'bias']\n", 651 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'intermediate', 'dense', 'kernel']\n", 652 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'output', 'LayerNorm', 'beta']\n", 653 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'output', 'LayerNorm', 'gamma']\n", 654 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'output', 'dense', 'bias']\n", 655 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_3', 'output', 'dense', 'kernel']\n", 656 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'output', 'LayerNorm', 'beta']\n", 657 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'output', 'LayerNorm', 'gamma']\n", 658 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'output', 'dense', 'bias']\n", 659 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'output', 'dense', 'kernel']\n", 660 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'self', 'key', 'bias']\n", 661 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'self', 'key', 'kernel']\n", 662 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'self', 'query', 'bias']\n", 663 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'self', 'query', 'kernel']\n", 664 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'self', 'value', 'bias']\n", 665 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'attention', 'self', 'value', 'kernel']\n", 666 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'intermediate', 'dense', 'bias']\n", 667 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'intermediate', 'dense', 'kernel']\n", 668 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'output', 'LayerNorm', 'beta']\n", 669 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'output', 'LayerNorm', 'gamma']\n", 670 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'output', 'dense', 'bias']\n", 671 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_4', 'output', 'dense', 'kernel']\n", 672 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'output', 'LayerNorm', 'beta']\n", 673 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'output', 'LayerNorm', 'gamma']\n", 674 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'output', 'dense', 'bias']\n", 675 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'output', 'dense', 'kernel']\n", 676 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'self', 'key', 'bias']\n", 677 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'self', 'key', 'kernel']\n", 678 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'self', 'query', 'bias']\n", 679 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'self', 'query', 'kernel']\n", 680 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'self', 'value', 'bias']\n", 681 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'attention', 'self', 'value', 'kernel']\n", 682 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'intermediate', 'dense', 'bias']\n", 683 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'intermediate', 'dense', 'kernel']\n", 684 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'output', 'LayerNorm', 'beta']\n", 685 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'output', 'LayerNorm', 'gamma']\n", 686 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'output', 'dense', 'bias']\n", 687 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_5', 'output', 'dense', 'kernel']\n", 688 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'output', 'LayerNorm', 'beta']\n", 689 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'output', 'LayerNorm', 'gamma']\n", 690 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'output', 'dense', 'bias']\n", 691 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'output', 'dense', 'kernel']\n", 692 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'self', 'key', 'bias']\n", 693 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'self', 'key', 'kernel']\n", 694 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'self', 'query', 'bias']\n", 695 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'self', 'query', 'kernel']\n", 696 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'self', 'value', 'bias']\n", 697 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'attention', 'self', 'value', 'kernel']\n", 698 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'intermediate', 'dense', 'bias']\n", 699 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'intermediate', 'dense', 'kernel']\n", 700 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'output', 'LayerNorm', 'beta']\n", 701 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'output', 'LayerNorm', 'gamma']\n", 702 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'output', 'dense', 'bias']\n", 703 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_6', 'output', 'dense', 'kernel']\n", 704 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'output', 'LayerNorm', 'beta']\n", 705 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'output', 'LayerNorm', 'gamma']\n", 706 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'output', 'dense', 'bias']\n", 707 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'output', 'dense', 'kernel']\n", 708 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'self', 'key', 'bias']\n", 709 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'self', 'key', 'kernel']\n", 710 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'self', 'query', 'bias']\n", 711 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'self', 'query', 'kernel']\n", 712 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'self', 'value', 'bias']\n", 713 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'attention', 'self', 'value', 'kernel']\n", 714 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'intermediate', 'dense', 'bias']\n", 715 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'intermediate', 'dense', 'kernel']\n", 716 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'output', 'LayerNorm', 'beta']\n", 717 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'output', 'LayerNorm', 'gamma']\n", 718 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'output', 'dense', 'bias']\n", 719 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_7', 'output', 'dense', 'kernel']\n", 720 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'output', 'LayerNorm', 'beta']\n", 721 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'output', 'LayerNorm', 'gamma']\n", 722 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'output', 'dense', 'bias']\n", 723 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'output', 'dense', 'kernel']\n", 724 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'self', 'key', 'bias']\n", 725 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'self', 'key', 'kernel']\n", 726 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'self', 'query', 'bias']\n", 727 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'self', 'query', 'kernel']\n", 728 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'self', 'value', 'bias']\n", 729 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'attention', 'self', 'value', 'kernel']\n", 730 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'intermediate', 'dense', 'bias']\n", 731 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'intermediate', 'dense', 'kernel']\n", 732 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'output', 'LayerNorm', 'beta']\n", 733 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'output', 'LayerNorm', 'gamma']\n", 734 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'output', 'dense', 'bias']\n", 735 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_8', 'output', 'dense', 'kernel']\n", 736 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'output', 'LayerNorm', 'beta']\n", 737 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'output', 'LayerNorm', 'gamma']\n", 738 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'output', 'dense', 'bias']\n", 739 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'output', 'dense', 'kernel']\n", 740 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'self', 'key', 'bias']\n", 741 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'self', 'key', 'kernel']\n", 742 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'self', 'query', 'bias']\n", 743 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'self', 'query', 'kernel']\n", 744 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'self', 'value', 'bias']\n", 745 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'attention', 'self', 'value', 'kernel']\n", 746 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'intermediate', 'dense', 'bias']\n", 747 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'intermediate', 'dense', 'kernel']\n", 748 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'output', 'LayerNorm', 'beta']\n", 749 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'output', 'LayerNorm', 'gamma']\n", 750 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'output', 'dense', 'bias']\n", 751 | "Initialize PyTorch weight ['bert', 'encoder', 'layer_9', 'output', 'dense', 'kernel']\n", 752 | "Initialize PyTorch weight ['bert', 'pooler', 'dense', 'bias']\n", 753 | "Initialize PyTorch weight ['bert', 'pooler', 'dense', 'kernel']\n", 754 | "Initialize PyTorch weight ['cls', 'predictions', 'output_bias']\n", 755 | "Initialize PyTorch weight ['cls', 'predictions', 'transform', 'LayerNorm', 'beta']\n", 756 | "Initialize PyTorch weight ['cls', 'predictions', 'transform', 'LayerNorm', 'gamma']\n", 757 | "Initialize PyTorch weight ['cls', 'predictions', 'transform', 'dense', 'bias']\n", 758 | "Initialize PyTorch weight ['cls', 'predictions', 'transform', 'dense', 'kernel']\n", 759 | "Initialize PyTorch weight ['cls', 'seq_relationship', 'output_bias']\n", 760 | "Initialize PyTorch weight ['cls', 'seq_relationship', 'output_weights']\n" 761 | ] 762 | }, 763 | { 764 | "ename": "NameError", 765 | "evalue": "name 'pytorch_dump_path' is not defined", 766 | "output_type": "error", 767 | "traceback": [ 768 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 769 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 770 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;31m# Save pytorch-model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 39\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Save PyTorch model to {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpytorch_dump_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 40\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpytorch_dump_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 771 | "\u001b[0;31mNameError\u001b[0m: name 'pytorch_dump_path' is not defined" 772 | ] 773 | } 774 | ], 775 | "source": [ 776 | "\n", 777 | "for name, array in zip(names, arrays):\n", 778 | " name = name.split('/')\n", 779 | " # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v\n", 780 | " # which are not required for using pretrained model\n", 781 | " if any(n in [\"adam_v\", \"adam_m\", \"global_step\"] for n in name):\n", 782 | " print(\"Skipping {}\".format(\"/\".join(name)))\n", 783 | " continue\n", 784 | " pointer = model\n", 785 | " for m_name in name:\n", 786 | " if re.fullmatch(r'[A-Za-z]+_\\d+', m_name):\n", 787 | " l = re.split(r'_(\\d+)', m_name)\n", 788 | " else:\n", 789 | " l = [m_name]\n", 790 | " if l[0] == 'kernel' or l[0] == 'gamma':\n", 791 | " pointer = getattr(pointer, 'weight')\n", 792 | " elif l[0] == 'output_bias' or l[0] == 'beta':\n", 793 | " pointer = getattr(pointer, 'bias')\n", 794 | " elif l[0] == 'output_weights':\n", 795 | " pointer = getattr(pointer, 'weight')\n", 796 | " else:\n", 797 | " pointer = getattr(pointer, l[0])\n", 798 | " if len(l) >= 2:\n", 799 | " num = int(l[1])\n", 800 | " pointer = pointer[num]\n", 801 | " if m_name[-11:] == '_embeddings':\n", 802 | " pointer = getattr(pointer, 'weight')\n", 803 | " elif m_name == 'kernel':\n", 804 | " array = np.transpose(array)\n", 805 | " try:\n", 806 | " assert pointer.shape == array.shape\n", 807 | " except AssertionError as e:\n", 808 | " e.args += (pointer.shape, array.shape)\n", 809 | " raise\n", 810 | " print(\"Initialize PyTorch weight {}\".format(name))\n", 811 | " pointer.data = torch.from_numpy(array)\n", 812 | "\n", 813 | "# Save pytorch-model\n" 814 | ] 815 | }, 816 | { 817 | "cell_type": "code", 818 | "execution_count": 34, 819 | "metadata": {}, 820 | "outputs": [ 821 | { 822 | "name": "stdout", 823 | "output_type": "stream", 824 | "text": [ 825 | "Save PyTorch model to weights/\n" 826 | ] 827 | } 828 | ], 829 | "source": [ 830 | "print(\"Save PyTorch model to {}\".format('weights/'))\n", 831 | "torch.save(model.state_dict(),'weights/pytorch_weight')" 832 | ] 833 | }, 834 | { 835 | "cell_type": "code", 836 | "execution_count": null, 837 | "metadata": {}, 838 | "outputs": [], 839 | "source": [] 840 | }, 841 | { 842 | "cell_type": "code", 843 | "execution_count": null, 844 | "metadata": {}, 845 | "outputs": [], 846 | "source": [ 847 | "import os\n", 848 | "import re\n", 849 | "import argparse\n", 850 | "import tensorflow as tf\n", 851 | "import torch\n", 852 | "import numpy as np\n", 853 | "\n", 854 | "from pytorch_pretrained_bert import BertConfig, BertForPreTraining\n", 855 | "\n", 856 | "def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):\n", 857 | " config_path = os.path.abspath(bert_config_file)\n", 858 | " tf_path = os.path.abspath(tf_checkpoint_path)\n", 859 | " print(\"Converting TensorFlow checkpoint from {} with config at {}\".format(tf_path, config_path))\n", 860 | " # Load weights from TF model\n", 861 | " init_vars = tf.train.list_variables(tf_path)\n", 862 | " names = []\n", 863 | " arrays = []\n", 864 | " for name, shape in init_vars:\n", 865 | " print(\"Loading TF weight {} with shape {}\".format(name, shape))\n", 866 | " array = tf.train.load_variable(tf_path, name)\n", 867 | " names.append(name)\n", 868 | " arrays.append(array)\n", 869 | "\n", 870 | " # Initialise PyTorch model\n", 871 | " config = BertConfig.from_json_file(bert_config_file)\n", 872 | " print(\"Building PyTorch model from configuration: {}\".format(str(config)))\n", 873 | " model = BertForPreTraining(config)\n", 874 | "\n", 875 | " for name, array in zip(names, arrays):\n", 876 | " name = name.split('/')\n", 877 | " # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v\n", 878 | " # which are not required for using pretrained model\n", 879 | " if any(n in [\"adam_v\", \"adam_m\", \"global_step\"] for n in name):\n", 880 | " print(\"Skipping {}\".format(\"/\".join(name)))\n", 881 | " continue\n", 882 | " pointer = model\n", 883 | " for m_name in name:\n", 884 | " if re.fullmatch(r'[A-Za-z]+_\\d+', m_name):\n", 885 | " l = re.split(r'_(\\d+)', m_name)\n", 886 | " else:\n", 887 | " l = [m_name]\n", 888 | " if l[0] == 'kernel' or l[0] == 'gamma':\n", 889 | " pointer = getattr(pointer, 'weight')\n", 890 | " elif l[0] == 'output_bias' or l[0] == 'beta':\n", 891 | " pointer = getattr(pointer, 'bias')\n", 892 | " elif l[0] == 'output_weights':\n", 893 | " pointer = getattr(pointer, 'weight')\n", 894 | " else:\n", 895 | " pointer = getattr(pointer, l[0])\n", 896 | " if len(l) >= 2:\n", 897 | " num = int(l[1])\n", 898 | " pointer = pointer[num]\n", 899 | " if m_name[-11:] == '_embeddings':\n", 900 | " pointer = getattr(pointer, 'weight')\n", 901 | " elif m_name == 'kernel':\n", 902 | " array = np.transpose(array)\n", 903 | " try:\n", 904 | " assert pointer.shape == array.shape\n", 905 | " except AssertionError as e:\n", 906 | " e.args += (pointer.shape, array.shape)\n", 907 | " raise\n", 908 | " print(\"Initialize PyTorch weight {}\".format(name))\n", 909 | " pointer.data = torch.from_numpy(array)\n", 910 | "\n", 911 | " # Save pytorch-model\n", 912 | " print(\"Save PyTorch model to {}\".format(pytorch_dump_path))\n", 913 | " torch.save(model.state_dict(), pytorch_dump_path)\n" 914 | ] 915 | } 916 | ], 917 | "metadata": { 918 | "kernelspec": { 919 | "display_name": "Python 3", 920 | "language": "python", 921 | "name": "python3" 922 | }, 923 | "language_info": { 924 | "codemirror_mode": { 925 | "name": "ipython", 926 | "version": 3 927 | }, 928 | "file_extension": ".py", 929 | "mimetype": "text/x-python", 930 | "name": "python", 931 | "nbconvert_exporter": "python", 932 | "pygments_lexer": "ipython3", 933 | "version": "3.6.8" 934 | } 935 | }, 936 | "nbformat": 4, 937 | "nbformat_minor": 2 938 | } 939 | -------------------------------------------------------------------------------- /biobert_ner/data_load.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils import data 3 | import parameters 4 | import torch 5 | from pytorch_pretrained_bert import BertTokenizer 6 | 7 | 8 | class HParams: 9 | def __init__(self, vocab_type): 10 | self.VOCAB_DICT = { 11 | 'bc5cdr': ('', 'B-Chemical', 'O', 'B-Disease' , 'I-Disease', 'I-Chemical'), 12 | 'bionlp3g' : ('', 'B-Amino_acid', 'B-Anatomical_system', 'B-Cancer', 'B-Cell', 13 | 'B-Cellular_component', 'B-Developing_anatomical_structure', 'B-Gene_or_gene_product', 14 | 'B-Immaterial_anatomical_entity', 'B-Multi-tissue_structure', 'B-Organ', 'B-Organism', 15 | 'B-Organism_subdivision', 'B-Organism_substance', 'B-Pathological_formation', 16 | 'B-Simple_chemical', 'B-Tissue', 'I-Amino_acid', 'I-Anatomical_system', 'I-Cancer', 17 | 'I-Cell', 'I-Cellular_component', 'I-Developing_anatomical_structure', 'I-Gene_or_gene_product', 18 | 'I-Immaterial_anatomical_entity', 'I-Multi-tissue_structure', 'I-Organ', 'I-Organism', 19 | 'I-Organism_subdivision', 'I-Organism_substance', 'I-Pathological_formation', 'I-Simple_chemical', 20 | 'I-Tissue', 'O') 21 | } 22 | self.VOCAB = self.VOCAB_DICT[vocab_type] 23 | self.tag2idx = {v:k for k,v in enumerate(self.VOCAB)} 24 | self.idx2tag = {k:v for k,v in enumerate(self.VOCAB)} 25 | 26 | self.batch_size = 128 27 | self.lr = 0.0001 28 | self.n_epochs = 30 29 | 30 | self.tokenizer = BertTokenizer(vocab_file=parameters.VOCAB_FILE, do_lower_case=False) 31 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 32 | 33 | 34 | class NerDataset(data.Dataset): 35 | def __init__(self, path, vocab_type): 36 | self.hp = HParams(vocab_type) 37 | instances = open(path).read().strip().split('\n\n') 38 | sents = [] 39 | tags_li = [] 40 | for entry in instances: 41 | words = [line.split()[0] for line in entry.splitlines()] 42 | tags = ([line.split()[-1] for line in entry.splitlines()]) 43 | sents.append(["[CLS]"] + words + ["[SEP]"]) 44 | tags_li.append([""] + tags + [""]) 45 | self.sents, self.tags_li = sents, tags_li 46 | 47 | def __len__(self): 48 | return len(self.sents) 49 | 50 | 51 | def __getitem__(self, idx): 52 | words, tags = self.sents[idx], self.tags_li[idx] # words, tags: string list 53 | 54 | # We give credits only to the first piece. 55 | x, y = [], [] # list of ids 56 | is_heads = [] # list. 1: the token is the first piece of a word 57 | for w, t in zip(words, tags): 58 | tokens = self.hp.tokenizer.tokenize(w) if w not in ("[CLS]", "[SEP]") else [w] 59 | xx = self.hp.tokenizer.convert_tokens_to_ids(tokens) 60 | 61 | is_head = [1] + [0]*(len(tokens) - 1) 62 | 63 | t = [t] + [""] * (len(tokens) - 1) # : no decision 64 | yy = [self.hp.tag2idx[each] for each in t] # (T,) 65 | 66 | x.extend(xx) 67 | is_heads.extend(is_head) 68 | y.extend(yy) 69 | 70 | assert len(x)==len(y)==len(is_heads), f"len(x)={len(x)}, len(y)={len(y)}, len(is_heads)={len(is_heads)}" 71 | 72 | # seqlen 73 | seqlen = len(y) 74 | 75 | # to string 76 | words = " ".join(words) 77 | tags = " ".join(tags) 78 | return words, x, is_heads, tags, y, seqlen 79 | 80 | 81 | def pad(batch): 82 | '''Pads to the longest sample''' 83 | f = lambda x: [sample[x] for sample in batch] 84 | words = f(0) 85 | is_heads = f(2) 86 | tags = f(3) 87 | seqlens = f(-1) 88 | maxlen = np.array(seqlens).max() 89 | 90 | f = lambda x, seqlen: [sample[x] + [0] * (seqlen - len(sample[x])) for sample in batch] # 0: 91 | x = f(1, maxlen) 92 | y = f(-2, maxlen) 93 | 94 | 95 | f = torch.LongTensor 96 | 97 | return words, f(x), is_heads, tags, f(y), seqlens -------------------------------------------------------------------------------- /biobert_ner/extras/ezgif.com-video-to-gif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MeRajat/SolvingAlmostAnythingWithBert/1bfb6d679a668179bbb783d1c0eb9f338cd0f1c5/biobert_ner/extras/ezgif.com-video-to-gif.gif -------------------------------------------------------------------------------- /biobert_ner/new_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from pytorch_pretrained_bert import BertModel 4 | 5 | class Net(nn.Module): 6 | def __init__(self, config, bert_state_dict, vocab_len, device = 'cpu'): 7 | super().__init__() 8 | self.bert = BertModel(config) 9 | if bert_state_dict is not None: 10 | self.bert.load_state_dict(bert_state_dict) 11 | self.bert.eval() 12 | self.rnn = nn.LSTM(bidirectional=True, num_layers=2, input_size=768, hidden_size=768//2, batch_first=True) 13 | self.fc = nn.Linear(768, vocab_len) 14 | self.device = device 15 | 16 | def forward(self, x, y): 17 | ''' 18 | x: (N, T). int64 19 | y: (N, T). int64 20 | 21 | Returns 22 | enc: (N, T, VOCAB) 23 | ''' 24 | x = x.to(self.device) 25 | y = y.to(self.device) 26 | 27 | with torch.no_grad(): 28 | encoded_layers, _ = self.bert(x) 29 | enc = encoded_layers[-1] 30 | enc, _ = self.rnn(enc) 31 | logits = self.fc(enc) 32 | y_hat = logits.argmax(-1) 33 | return logits, y, y_hat -------------------------------------------------------------------------------- /biobert_ner/new_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torch.utils import data 5 | from new_model import Net 6 | from data_load import NerDataset, pad, HParams 7 | import os 8 | import numpy as np 9 | from pytorch_pretrained_bert.modeling import BertConfig 10 | import parameters 11 | from collections import OrderedDict 12 | 13 | 14 | # prepare biobert dict 15 | tmp_d = torch.load(parameters.BERT_WEIGHTS, map_location='cpu') 16 | state_dict = OrderedDict() 17 | for i in list(tmp_d.keys())[:199]: 18 | x = i 19 | if i.find('bert') > -1: 20 | x = '.'.join(i.split('.')[1:]) 21 | state_dict[x] = tmp_d[i] 22 | 23 | 24 | def train(model, iterator, optimizer, criterion): 25 | model.train() 26 | for i, batch in enumerate(iterator): 27 | words, x, is_heads, tags, y, seqlens = batch 28 | _y = y # for monitoring 29 | optimizer.zero_grad() 30 | logits, y, _ = model(x, y) # logits: (N, T, VOCAB), y: (N, T) 31 | 32 | logits = logits.view(-1, logits.shape[-1]) # (N*T, VOCAB) 33 | y = y.view(-1) # (N*T,) 34 | 35 | loss = criterion(logits, y) 36 | loss.backward() 37 | 38 | optimizer.step() 39 | 40 | if i==0: 41 | print("=====sanity check======") 42 | print("x:", x.cpu().numpy()[0]) 43 | print("words:", words[0]) 44 | print("tokens:", hp.tokenizer.convert_ids_to_tokens(x.cpu().numpy()[0])) 45 | print("y:", _y.cpu().numpy()[0]) 46 | print("is_heads:", is_heads[0]) 47 | print("tags:", tags[0]) 48 | print("seqlen:", seqlens[0]) 49 | 50 | 51 | if i%10==0: # monitoring 52 | print(f"step: {i}, loss: {loss.item()}") 53 | 54 | def eval(model, iterator, f): 55 | model.eval() 56 | 57 | Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], [] 58 | with torch.no_grad(): 59 | for i, batch in enumerate(iterator): 60 | words, x, is_heads, tags, y, seqlens = batch 61 | 62 | _, _, y_hat = model(x, y) # y_hat: (N, T) 63 | 64 | Words.extend(words) 65 | Is_heads.extend(is_heads) 66 | Tags.extend(tags) 67 | Y.extend(y.numpy().tolist()) 68 | Y_hat.extend(y_hat.cpu().numpy().tolist()) 69 | 70 | ## gets results and save 71 | with open(f, 'w') as fout: 72 | for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat): 73 | y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1] 74 | preds = [hp.idx2tag[hat] for hat in y_hat] 75 | assert len(preds)==len(words.split())==len(tags.split()) 76 | for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]): 77 | fout.write(f"{w} {t} {p}\n") 78 | fout.write("\n") 79 | 80 | ## calc metric 81 | y_true = np.array([hp.tag2idx[line.split()[1]] for line in open(f, 'r').read().splitlines() if len(line) > 0]) 82 | y_pred = np.array([hp.tag2idx[line.split()[2]] for line in open(f, 'r').read().splitlines() if len(line) > 0]) 83 | 84 | num_proposed = len(y_pred[y_pred>1]) 85 | num_correct = (np.logical_and(y_true==y_pred, y_true>1)).astype(np.int).sum() 86 | num_gold = len(y_true[y_true>1]) 87 | 88 | print(f"num_proposed:{num_proposed}") 89 | print(f"num_correct:{num_correct}") 90 | print(f"num_gold:{num_gold}") 91 | try: 92 | precision = num_correct / num_proposed 93 | except ZeroDivisionError: 94 | precision = 1.0 95 | 96 | try: 97 | recall = num_correct / num_gold 98 | except ZeroDivisionError: 99 | recall = 1.0 100 | 101 | try: 102 | f1 = 2*precision*recall / (precision + recall) 103 | except ZeroDivisionError: 104 | if precision*recall==0: 105 | f1=1.0 106 | else: 107 | f1=0 108 | 109 | final = f + ".P%.2f_R%.2f_F%.2f" %(precision, recall, f1) 110 | with open(final, 'w') as fout: 111 | result = open(f, "r").read() 112 | fout.write(f"{result}\n") 113 | 114 | fout.write(f"precision={precision}\n") 115 | fout.write(f"recall={recall}\n") 116 | fout.write(f"f1={f1}\n") 117 | 118 | os.remove(f) 119 | 120 | print("precision=%.2f"%precision) 121 | print("recall=%.2f"%recall) 122 | print("f1=%.2f"%f1) 123 | return precision, recall, f1 124 | 125 | if __name__=="__main__": 126 | 127 | 128 | train_dataset = NerDataset("data/train.tsv", 'bc5cdr') # here bc5cdr is dataset type 129 | eval_dataset = NerDataset("data/test.tsv", 'bc5cdr') 130 | hp = HParams('bc5cdr') 131 | 132 | # Define model 133 | config = BertConfig(vocab_size_or_config_json_file=parameters.BERT_CONFIG_FILE) 134 | model = Net(config = config, bert_state_dict = state_dict, vocab_len = len(hp.VOCAB), device=hp.device) 135 | if torch.cuda.is_available(): 136 | model.cuda() 137 | model.train() 138 | # update with already pretrained weight 139 | 140 | 141 | 142 | train_iter = data.DataLoader(dataset=train_dataset, 143 | batch_size=hp.batch_size, 144 | shuffle=True, 145 | num_workers=4, 146 | collate_fn=pad) 147 | eval_iter = data.DataLoader(dataset=eval_dataset, 148 | batch_size=hp.batch_size, 149 | shuffle=False, 150 | num_workers=4, 151 | collate_fn=pad) 152 | 153 | optimizer = optim.Adam(model.parameters(), lr = hp.lr) 154 | # optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) 155 | criterion = nn.CrossEntropyLoss(ignore_index=0) 156 | 157 | for epoch in range(1, hp.n_epochs+1): 158 | train(model, train_iter, optimizer, criterion) 159 | print(f"=========eval at epoch={epoch}=========") 160 | if not os.path.exists('checkpoints'): os.makedirs('checkpoints') 161 | fname = os.path.join('checkpoints', str(epoch)) 162 | precision, recall, f1 = eval(model, eval_iter, fname) 163 | torch.save(model.state_dict(), f"{fname}.pt") -------------------------------------------------------------------------------- /biobert_ner/parameters.py: -------------------------------------------------------------------------------- 1 | VOCAB_FILE = 'weights/pubmed_pmc_470k/vocab.txt' 2 | BERT_CONFIG_FILE = 'weights/pubmed_pmc_470k/bert_config.json' 3 | BC5CDR_WEIGHT = 'weights/bc5cdr_wt.pt' 4 | BIONLP13CG_WEIGHT = 'weights/bionlp13cg_wt.pt' 5 | BERT_WEIGHTS = 'weights/pytorch_weight' -------------------------------------------------------------------------------- /biobert_ner/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.5.4 2 | async-timeout==3.0.1 3 | attrs==18.2.0 4 | boto3==1.9.105 5 | botocore==1.12.105 6 | certifi==2018.11.29 7 | chardet==3.0.4 8 | Click==7.0 9 | docutils==0.14 10 | h11==0.8.1 11 | httptools==0.0.13 12 | idna==2.8 13 | idna-ssl==1.1.0 14 | jmespath==0.9.4 15 | multidict==4.5.2 16 | numpy==1.16.2 17 | pkg-resources==0.0.0 18 | python-dateutil==2.8.0 19 | pytorch-pretrained-bert==0.6.1 20 | regex==2019.2.21 21 | requests==2.21.0 22 | s3transfer==0.2.0 23 | six==1.12.0 24 | starlette==0.11.3 25 | torch==1.0.1.post2 26 | tqdm==4.31.1 27 | typing-extensions==3.7.2 28 | urllib3==1.24.1 29 | uvicorn==0.4.6 30 | uvloop==0.12.1 31 | websockets==7.0 32 | yarl==1.3.0 -------------------------------------------------------------------------------- /fill_the_blanks/README.md: -------------------------------------------------------------------------------- 1 | ![Alt Text](fillblank.gif) 2 | -------------------------------------------------------------------------------- /fill_the_blanks/fill_blanks.py: -------------------------------------------------------------------------------- 1 | from pytorch_pretrained_bert import BertTokenizer, BertForMaskedLM 2 | import torch 3 | import json 4 | import re 5 | import ftfy 6 | from starlette.applications import Starlette 7 | from starlette.responses import JSONResponse, HTMLResponse, RedirectResponse 8 | import torch 9 | import uvicorn 10 | import aiohttp 11 | 12 | app = Starlette() 13 | 14 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 15 | 16 | bert_model = 'bert-large-uncased' 17 | tokenizer = BertTokenizer.from_pretrained(bert_model) 18 | model = BertForMaskedLM.from_pretrained(bert_model) 19 | _ = model.eval().to(device) 20 | print("model loaded") 21 | def get_score(model, tokenizer, q_tensors, s_tensors, m_index, candidate): 22 | candidate_tokens = tokenizer.tokenize(candidate) 23 | candidate_ids = tokenizer.convert_tokens_to_ids(candidate_tokens) 24 | 25 | preds = model(q_tensors.to(device), s_tensors.to(device)) 26 | predictions_candidates = preds[0, m_index, candidate_ids].mean() 27 | return predictions_candidates.item() 28 | 29 | 30 | def get_word(row): 31 | """ 32 | 33 | """ 34 | question = re.sub('\_+', ' [MASK] ', ftfy.fix_encoding(row['question'])) 35 | question_tokens = tokenizer.tokenize(question) 36 | masked_index = question_tokens.index('[MASK]') 37 | ## Make segments 38 | segment_ids = [0] * len(question_tokens) 39 | segment_tensors = torch.tensor([segment_ids]) 40 | # Convert tokens to ids and tensors 41 | question_ids = tokenizer.convert_tokens_to_ids(question_tokens) 42 | question_tensors = torch.tensor([question_ids]).to(device) 43 | 44 | candidates = [ftfy.fix_encoding(row['1']), ftfy.fix_encoding(row['2']), ftfy.fix_encoding(row['3']), ftfy.fix_encoding(row['4'])] 45 | 46 | predict_tensor = torch.tensor([get_score(model, tokenizer, question_tensors, segment_tensors, masked_index, candidate) for candidate in candidates]) 47 | predict_idx = torch.argmax(predict_tensor).item() 48 | return candidates[predict_idx], predict_tensor 49 | 50 | 51 | @app.route("/fill_blank", methods = ["GET"]) 52 | async def fill_blank(request): 53 | row = {} 54 | row["question"] = request.query_params["question"] 55 | row["1"] = request.query_params["op1"] 56 | row["2"] = request.query_params["op2"] 57 | row["3"] = request.query_params["op3"] 58 | row["4"] = request.query_params["op4"] 59 | correct_word, prob_tensor = get_word(row) 60 | return JSONResponse({'word': correct_word}) 61 | 62 | 63 | 64 | 65 | @app.route("/") 66 | def form(_): 67 | return HTMLResponse(""" 68 |

Try the intelligent fill in the blanks

69 |
70 | 71 |
72 | 73 |
74 | 75 |
76 | 77 |
78 | 79 |
80 | 81 |
82 | """) 83 | 84 | @app.route("/form") 85 | def redirect_to_homepage(_): 86 | return RedirectResponse("/") 87 | 88 | 89 | 90 | if __name__ == "__main__": 91 | # To run this app start application on server with python 92 | # python FILENAME serve 93 | # ex: python server.py serve 94 | # if "serve" in sys.argv: 95 | uvicorn.run(app, host="0.0.0.0", port=9000) 96 | 97 | -------------------------------------------------------------------------------- /fill_the_blanks/fillblank.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MeRajat/SolvingAlmostAnythingWithBert/1bfb6d679a668179bbb783d1c0eb9f338cd0f1c5/fill_the_blanks/fillblank.gif --------------------------------------------------------------------------------