├── src
    ├── server
    │   ├── __init__.py
    │   ├── core
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── test_config.py
    │   │   │   └── test_app.py
    │   │   ├── extensions.py
    │   │   ├── models
    │   │   │   ├── translation.py
    │   │   │   ├── language.py
    │   │   │   ├── feedback.py
    │   │   │   └── predict.py
    │   │   ├── config.py
    │   │   ├── utils_bucket
    │   │   │   ├── upload_download.py
    │   │   │   └── bucket.py
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── resources
    │   │   │   └── translate.py
    │   │   └── model_load.py
    │   ├── nginx
    │   │   ├── Dockerfile
    │   │   └── nginx.conf
    │   ├── .env.dev
    │   ├── entrypoint.sh
    │   ├── entrypoint.prod.sh
    │   ├── Dockerfile
    │   ├── requirements.txt
    │   ├── Dockerfile.prod
    │   ├── manage.py
    │   └── README.md
    ├── client
    │   ├── public
    │   │   ├── robots.txt
    │   │   ├── favico.png
    │   │   ├── favicon.ico
    │   │   ├── e5b14e8b30296b86b78d06886aa5a458.png
    │   │   ├── manifest.json
    │   │   ├── bundle.js.LICENSE.txt
    │   │   ├── index.html
    │   │   └── 217.bundle.js
    │   ├── src
    │   │   ├── images
    │   │   │   ├── logo1.png
    │   │   │   ├── logo2.png
    │   │   │   ├── favico.png
    │   │   │   ├── favicon.ico
    │   │   │   ├── masakhane.png
    │   │   │   ├── masakhane_bg.png
    │   │   │   ├── masakhane_bg2.png
    │   │   │   └── masakhane-border.png
    │   │   ├── App.test.js
    │   │   ├── components
    │   │   │   ├── step3.test.js
    │   │   │   ├── multiStepForm.test.js
    │   │   │   ├── translateCard.test.js
    │   │   │   ├── step1.test.js
    │   │   │   ├── common
    │   │   │   │   ├── radioButton.js
    │   │   │   │   └── radioButton.test.js
    │   │   │   ├── step2.test.js
    │   │   │   ├── step3.js
    │   │   │   ├── multiStepForm.js
    │   │   │   ├── terms.js
    │   │   │   ├── step1.js
    │   │   │   ├── step2.js
    │   │   │   └── translateCard.js
    │   │   ├── setupTests.js
    │   │   ├── setupProxy.js
    │   │   ├── index.css
    │   │   ├── reportWebVitals.js
    │   │   ├── index.js
    │   │   ├── pages
    │   │   │   ├── Home.js
    │   │   │   ├── Faq.js
    │   │   │   └── About.js
    │   │   ├── logo.svg
    │   │   └── App.js
    │   ├── README.md
    │   ├── Dockerfile
    │   ├── package.json
    │   └── webpack.config.js
    ├── m_to_m_models
    │   ├── kubernetes
    │   │   ├── volume_claim.yaml
    │   │   ├── volume.yaml
    │   │   ├── secret.yaml
    │   │   ├── triton-deployment.yaml
    │   │   └── deployment.yaml
    │   ├── app.py
    │   ├── main.py
    │   ├── model_handlers.py
    │   ├── Dockerfile
    │   └── requirements.txt
    └── torchserve
    │   ├── setup_config.json
    │   ├── Download_Transformer_models.py
    │   └── transformer_handler.py
├── .python-version
├── .dockerignore
├── entrypoint.sh
├── kubernetes
    ├── ingress-def.yml
    └── sample-server.yaml
├── .github
    └── ISSUE_TEMPLATE
    │   └── dsfsi-standard-template.md
├── docker-compose.prod.yml
├── LICENSE
├── todo.md
├── docker-compose.yml
├── .gitignore
├── docs
    ├── start_app_prod_doc.md
    ├── debugging_setup.md
    ├── project_details.md
    └── start_app_locally_doc.md
├── requirements-python3.10.txt
├── environment.yaml
├── README.md
└── Makefile


/src/server/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.10.7
2 | 


--------------------------------------------------------------------------------
/src/server/core/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/client/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 | 


--------------------------------------------------------------------------------
/src/client/public/favico.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/public/favico.png


--------------------------------------------------------------------------------
/src/client/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/public/favicon.ico


--------------------------------------------------------------------------------
/src/client/src/images/logo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/logo1.png


--------------------------------------------------------------------------------
/src/client/src/images/logo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/logo2.png


--------------------------------------------------------------------------------
/src/client/src/images/favico.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/favico.png


--------------------------------------------------------------------------------
/src/client/src/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/favicon.ico


--------------------------------------------------------------------------------
/src/client/src/images/masakhane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/masakhane.png


--------------------------------------------------------------------------------
/src/client/src/images/masakhane_bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/masakhane_bg.png


--------------------------------------------------------------------------------
/src/client/src/images/masakhane_bg2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/masakhane_bg2.png


--------------------------------------------------------------------------------
/src/server/nginx/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx:1.17-alpine
2 | 
3 | RUN rm /etc/nginx/conf.d/default.conf
4 | COPY nginx.conf /etc/nginx/conf.d


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | env
2 | .dockerignore
3 | Dockerfile-dev
4 | Dockerfile-prod
5 | 
6 | src/server/models/joeynmt
7 | src/server/core/models/joeynmt


--------------------------------------------------------------------------------
/src/client/src/images/masakhane-border.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/src/images/masakhane-border.png


--------------------------------------------------------------------------------
/src/client/public/e5b14e8b30296b86b78d06886aa5a458.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dsfsi/masakhane-web/HEAD/src/client/public/e5b14e8b30296b86b78d06886aa5a458.png


--------------------------------------------------------------------------------
/src/server/core/extensions.py:
--------------------------------------------------------------------------------
1 | from flask_sqlalchemy import SQLAlchemy
2 | from flask_migrate import Migrate
3 | 
4 | import os, sqlite3
5 | 
6 | db = SQLAlchemy()
7 | migrate = Migrate()
8 | 


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | echo "Waiting for postgres ..."
 4 | 
 5 | while ! nc -z users-db 5432; do 
 6 |     sleep 0.1
 7 |     done 
 8 |     echo "PostgreSQL started"
 9 | 
10 |     python app.py


--------------------------------------------------------------------------------
/src/client/src/App.test.js:
--------------------------------------------------------------------------------
1 | import { render, screen } from '@testing-library/react';
2 | import App from './App';
3 | 
4 | describe('App', () => {
5 |   test('renders App component', () => {
6 |     render(<App />);
7 |   });
8 | });
9 | 


--------------------------------------------------------------------------------
/src/client/src/components/step3.test.js:
--------------------------------------------------------------------------------
1 | import { render, screen } from '@testing-library/react';
2 | import Step3 from './step3';
3 | 
4 | describe('Step3', () => {
5 |   test('renders Step3 component', () => {
6 |     render(<Step3 />);
7 |   });
8 | });


--------------------------------------------------------------------------------
/src/server/.env.dev:
--------------------------------------------------------------------------------
 1 | FLASK_APP=core/__init__.py
 2 | FLASK_ENV=development
 3 | DATABASE_URL=postgresql://masakhane:masakhane@db:5432/masakhane
 4 | SQL_HOST=db
 5 | SQL_PORT=5432
 6 | DATABASE=postgres
 7 | SECRET_KEY=secret-key
 8 | MODEL=./models/joeynmt/
 9 | FLASK_DEBUG=1
10 | 


--------------------------------------------------------------------------------
/src/client/src/setupTests.js:
--------------------------------------------------------------------------------
1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
2 | // allows you to do things like:
3 | // expect(element).toHaveTextContent(/react/i)
4 | // learn more: https://github.com/testing-library/jest-dom
5 | import '@testing-library/jest-dom';
6 | 


--------------------------------------------------------------------------------
/src/client/src/components/multiStepForm.test.js:
--------------------------------------------------------------------------------
1 | import { render, screen } from '@testing-library/react';
2 | import MultiStepForm from './multiStepForm';
3 | 
4 | describe('MultiStepForm', () => {
5 |   test('renders MultiStepForm component', () => {
6 |     render(<MultiStepForm />);
7 |   });
8 | });


--------------------------------------------------------------------------------
/src/client/src/components/translateCard.test.js:
--------------------------------------------------------------------------------
1 | import { render, screen } from '@testing-library/react';
2 | import TranslateCard from './translateCard';
3 | 
4 | describe('TranslateCard', () => {
5 |   test('renders TranslateCard component', () => {
6 |     render(<TranslateCard />);
7 |   });
8 | });


--------------------------------------------------------------------------------
/src/m_to_m_models/kubernetes/volume_claim.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: masakhane-model-cache-volume-claim
 5 |   namespace: masakhane
 6 | spec:
 7 |   storageClassName: manual
 8 |   accessModes:
 9 |     - ReadWriteOnce
10 |   resources:
11 |     requests:
12 |       storage: 8Gi
13 | 


--------------------------------------------------------------------------------
/src/client/src/setupProxy.js:
--------------------------------------------------------------------------------
 1 | // const { createProxyMiddleware } = require('http-proxy-middleware');
 2 | 
 3 | // module.exports = function(app) {
 4 | //   app.use(
 5 | //     '/translate',
 6 | //     createProxyMiddleware({
 7 | //       target: 'http://localhost:5000',
 8 | //       changeOrigin: true,
 9 | //     })
10 | //   );
11 | // };


--------------------------------------------------------------------------------
/src/server/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ "$DATABASE" = "postgres" ]
 4 | then
 5 |     echo "Waiting for postgres..."
 6 | 
 7 |     while ! nc -z $SQL_HOST $SQL_PORT; do
 8 |       sleep 0.1
 9 |     done
10 | 
11 |     echo "PostgreSQL started"
12 | fi
13 | 
14 | python manage.py create_db
15 | python manage.py add_language en-sw-JW300
16 | exec "$@"


--------------------------------------------------------------------------------
/src/torchserve/setup_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "model_name": "masakhane/m2m100_418M_en_swa_rel_news",
 3 |  "mode": "text_generation",
 4 |  "do_lower_case":false,
 5 |  "num_labels":"0",
 6 |  "save_mode":"pretrained",
 7 |  "max_length":"150",
 8 |  "captum_explanation":true,
 9 |  "embedding_name": "bert",
10 |  "FasterTransformer":false,
11 |  "BetterTransformer":false,
12 |  "model_parallel":false
13 | }
14 | 


--------------------------------------------------------------------------------
/src/m_to_m_models/kubernetes/volume.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolume
 3 | metadata:
 4 |   name: translation-volume-storage
 5 |   namespace: masakhane
 6 |   labels:
 7 |     type: local
 8 | spec:
 9 |   storageClassName: manual
10 |   accessModes:
11 |     - ReadWriteOnce
12 |   capacity:
13 |     storage: 10Gi
14 |   hostPath:
15 |     path: /models_datastore # the host on the minikube vm
16 | 


--------------------------------------------------------------------------------
/kubernetes/ingress-def.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: seldon-ingress
 5 |   namespace: seldon
 6 | spec:
 7 |   rules:
 8 |   - host: seldon-ingress.com
 9 |     http:
10 |       paths:
11 |       - path: "/"
12 |         pathType: Prefix
13 |         backend:
14 |           service:
15 |             name: iris-model-sklearn-iris-predictor
16 |             port:
17 |               number: 8000
18 | 


--------------------------------------------------------------------------------
/src/client/src/index.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin: 0;
 3 |   font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
 4 |     'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
 5 |     sans-serif;
 6 |   -webkit-font-smoothing: antialiased;
 7 |   -moz-osx-font-smoothing: grayscale;
 8 | }
 9 | 
10 | code {
11 |   font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
12 |     monospace;
13 | }
14 | 


--------------------------------------------------------------------------------
/src/client/src/reportWebVitals.js:
--------------------------------------------------------------------------------
 1 | const reportWebVitals = onPerfEntry => {
 2 |   if (onPerfEntry && onPerfEntry instanceof Function) {
 3 |     import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
 4 |       getCLS(onPerfEntry);
 5 |       getFID(onPerfEntry);
 6 |       getFCP(onPerfEntry);
 7 |       getLCP(onPerfEntry);
 8 |       getTTFB(onPerfEntry);
 9 |     });
10 |   }
11 | };
12 | 
13 | export default reportWebVitals;
14 | 


--------------------------------------------------------------------------------
/src/m_to_m_models/kubernetes/secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   name: masakhane-container-secret
 5 |   namespace: masakhane
 6 | type: Opaque
 7 | stringData:
 8 |   RCLONE_CONFIG_S3_TYPE: s3
 9 |   RCLONE_CONFIG_S3_PROVIDER: minio
10 |   RCLONE_CONFIG_S3_ENV_AUTH: "false"
11 |   RCLONE_CONFIG_S3_ACCESS_KEY_ID: minioadmin
12 |   RCLONE_CONFIG_S3_SECRET_ACCESS_KEY: minioadmin
13 |   RCLONE_CONFIG_S3_ENDPOINT: http://minio.minio-system.svc.cluster.local:9000
14 | 


--------------------------------------------------------------------------------
/src/client/src/components/step1.test.js:
--------------------------------------------------------------------------------
 1 | import { render, screen } from '@testing-library/react';
 2 | import Step1 from './step1';
 3 | 
 4 | describe('Step1', () => {
 5 |   test('renders Step1 component', () => {
 6 |     const props = { 
 7 |       src_lang: "none", 
 8 |       tgt_lang: "none", 
 9 |       setForm: () => {}, 
10 |       formData: {}, 
11 |       navigation: {}, 
12 |       handleSubmitFeedback: () => {} 
13 |     };
14 |     render(<Step1 {...props} />);
15 |   });
16 | });
17 | 


--------------------------------------------------------------------------------
/src/server/core/tests/base.py:
--------------------------------------------------------------------------------
 1 | from flask_testing import TestCase
 2 | from core.extensions import db
 3 | from core import masakhane, load_model
 4 | 
 5 | 
 6 | class BaseTestCase(TestCase):
 7 |     def create_app(self):
 8 |         masakhane.config.from_object('core.config.Config')
 9 |         return masakhane
10 | 
11 |     def setUp(self):
12 |         db.create_all()
13 |         db.session.commit()
14 | 
15 |     def tearDown(self):
16 |         db.session.remove()
17 |         db.drop_all()


--------------------------------------------------------------------------------
/src/client/public/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "short_name": "Masakhane Web",
 3 |   "name": "Create React App Sample",
 4 |   "icons": [
 5 |     {
 6 |       "src": "favico.png",
 7 |       "type": "image/png",
 8 |       "sizes": "192x192"
 9 |     },
10 |     {
11 |       "src": "favico.png",
12 |       "type": "image/png",
13 |       "sizes": "512x512"
14 |     }
15 |   ],
16 |   "start_url": ".",
17 |   "display": "standalone",
18 |   "theme_color": "#000000",
19 |   "background_color": "#ffffff"
20 | }
21 | 


--------------------------------------------------------------------------------
/src/client/src/components/common/radioButton.js:
--------------------------------------------------------------------------------
 1 | import { Form } from 'react-bootstrap';
 2 | import React from 'react';
 3 | 
 4 | const RadioButton = ({ value, label, selected, ...otherProps }) => {
 5 |     return(
 6 |         <div>
 7 |             <Form.Label style={{ color: 'gray', fontSize: 14 }}>{label}</Form.Label>
 8 |             <Form.Check id="none" value={value} type="radio" defaultChecked={selected === value ? true : false} {...otherProps} />
 9 |         </div>
10 |     );
11 | }
12 | 
13 | export default RadioButton;


--------------------------------------------------------------------------------
/src/client/src/components/step2.test.js:
--------------------------------------------------------------------------------
 1 | import { render, screen } from '@testing-library/react';
 2 | import Step2 from './step2';
 3 | 
 4 | describe('Step2', () => {
 5 |   test('renders Step2 component', () => {
 6 |     const props = { 
 7 |       src_lang: "none", 
 8 |       tgt_lang: "none",
 9 |       text: "", 
10 |       translation: "", 
11 |       setForm: () => {}, 
12 |       formData: {}, 
13 |       navigation: {}, 
14 |       handleSubmitFeedback: () => {} 
15 |     };
16 |     render(<Step2 {...props} />);
17 |   });
18 | });


--------------------------------------------------------------------------------
/src/server/core/models/translation.py:
--------------------------------------------------------------------------------
 1 | class Translation:
 2 |     def __init__(self, src_lang, tgt_lang, input, output) -> None:
 3 |         super().__init__()
 4 |         self.src_lang = src_lang
 5 |         self.tgt_lang = tgt_lang
 6 |         self.input = input
 7 |         self.output = output
 8 | 
 9 |     @property
10 |     def data(self):
11 |         return {
12 |             'src_lang': self.src_lang,
13 |             'tgt_lang': self.tgt_lang,
14 |             'input': self.input,
15 |             'output': self.output
16 |         }


--------------------------------------------------------------------------------
/src/server/entrypoint.prod.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ "$DATABASE" = "postgres" ]
 4 | then
 5 |     echo "Waiting for postgres..."
 6 | 
 7 |     while ! nc -z $SQL_HOST $SQL_PORT; do
 8 |       sleep 0.1
 9 |     done
10 | 
11 |     echo "PostgreSQL started"
12 | fi
13 | 
14 | if [ "$FLASK_ENV" = "development" ]
15 | then
16 |     echo "Creating the database tables..."
17 |     python manage.py clean
18 |     echo "Tables created"
19 | fi
20 | 
21 | python manage.py create_db
22 | 
23 | python manage.py add_language en-sw-JW300
24 | 
25 | exec "$@"


--------------------------------------------------------------------------------
/src/client/src/components/common/radioButton.test.js:
--------------------------------------------------------------------------------
 1 | import { 
 2 |     render, 
 3 |     screen, 
 4 |     getByRole,
 5 |     findByText, 
 6 | } from '@testing-library/react';
 7 | import RadioButton from './radioButton';
 8 | 
 9 | describe('RadioButton', () => {
10 |   test('renders RadioButton component', () => {
11 |     render(<RadioButton />);
12 |   });
13 | 
14 | //   test('should have a radio button input', () => {
15 |     
16 | //   })
17 | 
18 | //   test('should fire an onchange event', () => {
19 |     
20 | //   })
21 |   
22 | });
23 | 


--------------------------------------------------------------------------------
/src/client/src/index.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom';
 3 | import App from './App';
 4 | import reportWebVitals from './reportWebVitals';
 5 | import "core-js/stable";
 6 | import "regenerator-runtime/runtime";
 7 | 
 8 | ReactDOM.render(
 9 |   <App/>,
10 |   document.getElementById('root')
11 | );
12 | 
13 | // If you want to start measuring performance in your app, pass a function
14 | // to log results (for example: reportWebVitals(console.log))
15 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
16 | reportWebVitals();
17 | 


--------------------------------------------------------------------------------
/src/m_to_m_models/kubernetes/triton-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: machinelearning.seldon.io/v1
 2 | kind: SeldonDeployment
 3 | metadata:
 4 |   name: triton-masakhane
 5 |   namespace: masakhane
 6 | spec:
 7 |   name: default
 8 |   predictors:
 9 |   - graph:
10 |       implementation: TRITON_SERVER
11 |       logger:
12 |         mode: all
13 |       modelUri: s3://language-models/onnx-m2m100/1
14 |       envSecretRefName: masakhane-container-secret
15 |       name: triston-masakhane-predictor
16 |       type: MODEL
17 |     name: default
18 |     replicas: 1
19 |   protocol: kfserving
20 | 


--------------------------------------------------------------------------------
/src/client/README.md:
--------------------------------------------------------------------------------
 1 | # The Frontend
 2 | 
 3 | **NOTE** I know next to nothing about this frontend so update needed
 4 | 
 5 | The client is running on http://localhost:3000  
 6 | 
 7 | It consists of  
 8 | -   ReactJS
 9 | -   Webpack
10 | 
11 | # Available npm scripts:
12 | 
13 | | Command | Executes | 
14 | | ------- | -------- |
15 | | `npm run develop` | `webpack-dev-server --host 0.0.0.0` |
16 | | `npm run start-api` | `cd ../server && python app.py` |
17 | | `npm run build` | `react-scripts build` |
18 | | `npm run test` | `react-scripts test` |
19 | | `npm run eject` | `react-scripts eject` |


--------------------------------------------------------------------------------
/src/client/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Building the application
 2 | FROM node:lts-buster as build
 3 | 
 4 | WORKDIR /app
 5 | 
 6 | ENV PATH /app/node_modules/.bin:$PATH
 7 | # Increate node max memory, the default memory limit is too low for building 
 8 | ENV NODE_OPTIONS --max-old-space-size=8192 
 9 | 
10 | # add dependencies
11 | COPY package.json package-lock.json ./
12 | # install dependencies 
13 | RUN npm install --legacy-peer-deps
14 | RUN npm i webpack webpack-cli --legacy-peer-deps
15 | RUN npm i @babel/core @babel/preset-env @babel/preset-react babel-loader --legacy-peer-deps
16 | 
17 | # add app
18 | COPY . ./
19 | 
20 | # RUN npm command 
21 | CMD ["npm", "run", "develop"]
22 | 


--------------------------------------------------------------------------------
/src/client/src/components/step3.js:
--------------------------------------------------------------------------------
 1 | import { Button } from 'react-bootstrap';
 2 | import React from 'react';
 3 | 
 4 | const Step3 = ({ setShow }) => {
 5 |     const handleShow = () => setShow(false);
 6 | 
 7 |     return (
 8 |         <div style={{textAlign: 'center'}}>
 9 |             <h6>THANK YOU!</h6>
10 |             {/* <p style={{fontSize: 11, color: 'gray'}}>We appreciate your feedback and your contribution which help us make translations better.</p> */}
11 |             <div>
12 |                 <Button size="sm" variant="outline-primary" onClick={handleShow}>Done</Button>
13 |             </div>
14 |         </div>
15 |     )
16 | }
17 | 
18 | export default Step3;
19 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/dsfsi-standard-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: DSFSI Standard Template
 3 | about: Describe this issue template's purpose here.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | #### Description
11 | A clear and concise description of what the issue is about.
12 | 
13 | #### Screenshots
14 | ![Downhill Windmills](http://i.giphy.com/KO8AG2EByqkFi.gif)
15 | 
16 | #### Files
17 | A list of relevant files for this issue. This will help people navigate the project and offer some clues of where to start.
18 | 
19 | #### To Reproduce
20 | If this issue is describing a bug, include some steps to reproduce the behavior.
21 | 
22 | #### Tasks
23 | Include specific tasks in the order they need to be done in. Include links to specific lines of code where the task should happen at.
24 | - [ ] Task 1
25 | - [ ] Task 2
26 | - [ ] Task 3
27 | 


--------------------------------------------------------------------------------
/kubernetes/sample-server.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: machinelearning.seldon.io/v1
 2 | kind: SeldonDeployment
 3 | metadata:
 4 |   name: iris-model
 5 |   namespace: seldon
 6 | spec:
 7 |   name: iris
 8 |   annotations:
 9 |     prometheus.io/scrape: "false"
10 |   predictors:
11 |   - componentSpecs:
12 |     - spec:
13 |         containers:
14 |             - env:
15 |               - name: SELDON_LOG_LEVEL
16 |                 value: DEBUG
17 |               - name: SELDON_DEBUG
18 |                 value: 'True'
19 |               - name: FLASK_DEBUG
20 |                 value: 'True'
21 |               image: seldonio/sklearn-iris:0.3
22 |               imagePullPolicy: IfNotPresent
23 |               name: sklearn-iris-classifier
24 |     graph:
25 |         endpoint:
26 |           type: REST
27 |         name: sklearn-iris-classifier
28 |         type: MODEL
29 |     name: sklearn-iris-predictor
30 |     replicas: 1
31 | 


--------------------------------------------------------------------------------
/src/m_to_m_models/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, jsonify, request
 2 | from flask_cors import CORS
 3 | import logging
 4 | 
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | def create_app(model_handler):
 9 |     app = Flask(__name__, static_url_path="")
10 |     CORS(app)
11 | 
12 |     @app.route("/predict", methods=["GET", "POST"])
13 |     def predict():
14 |         request_data = request.get_json()
15 |         logger.debug("REST Request: %s", request)
16 |         response = model_handler.predict_raw(request_data)
17 | 
18 |         json_response = jsonify(response)
19 |         if (
20 |             isinstance(response, dict)
21 |             and "status" in response
22 |             and "code" in response["status"]
23 |         ):
24 |             json_response.status_code = response["status"]["code"]
25 | 
26 |         logger.debug("REST Response: %s", response)
27 |         return json_response
28 | 
29 |     return app
30 | 


--------------------------------------------------------------------------------
/src/server/nginx/nginx.conf:
--------------------------------------------------------------------------------
 1 | upstream masakhane-web {
 2 |     server api:5000;
 3 | }
 4 | 
 5 | upstream masakhane-web-client {
 6 |     server client:3000;
 7 | }
 8 | 
 9 | server {
10 | 
11 |     listen 80;
12 | 
13 |     root /images/;
14 | 
15 |     location / {
16 |         proxy_pass http://masakhane-web-client;
17 |         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
18 |         proxy_set_header Host $host;
19 |         proxy_redirect off;
20 |     }
21 | 
22 |     location /translate {
23 |         proxy_pass http://masakhane-web;
24 |         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
25 |         proxy_set_header Host $host;
26 |         proxy_redirect off;
27 |     }
28 | 
29 |     location /save {
30 |         proxy_pass http://masakhane-web;
31 |         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
32 |         proxy_set_header Host $host;
33 |         proxy_redirect off;
34 |     }
35 | }


--------------------------------------------------------------------------------
/docker-compose.prod.yml:
--------------------------------------------------------------------------------
 1 | version: '3.7'
 2 | 
 3 | services:
 4 |   api:
 5 |     build :
 6 |       context: ./src/server
 7 |       dockerfile: Dockerfile.prod
 8 |     command: gunicorn --bind 0.0.0.0:5000 manage:masakhane
 9 |     ports:
10 |       - 5000:5000
11 |     # expose:
12 |     #   - 5000
13 |     env_file:
14 |       - ./.env.prod
15 |     depends_on:
16 |       - db
17 | 
18 |   nginx:
19 |     build: ./src/server/nginx
20 |     ports:
21 |       - 80:80
22 |     depends_on:
23 |       - api
24 | 
25 |   db:
26 |     image: postgres:12-alpine
27 |     volumes:
28 |       - postgres_data:/var/lib/postgresql/data/
29 |     env_file:
30 |       - ./.env.prod.db
31 | 
32 |   client:
33 |     build :
34 |       context: ./src/client
35 |       dockerfile: Dockerfile
36 |     # command: curl --location --request GET 'http://0.0.0.0:5000/update' --data-raw ''
37 |     volumes:
38 |       - './src/client:/usr/src/app'
39 |     ports:
40 |       - 3000:3000
41 | 
42 |     depends_on: 
43 |       - api
44 | 
45 | volumes:
46 |   postgres_data:


--------------------------------------------------------------------------------
/src/server/core/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | basedir = os.path.abspath(os.path.dirname(__file__))
 4 | 
 5 | 
 6 | class Config:
 7 |     DEBUG = False
 8 |     SQLALCHEMY_DATABASE_URI = os.getenv("DATABASE_URL", "sqlite:///masakhane.db")
 9 | 
10 |     SQLALCHEMY_TRACK_MODIFICATIONS = False
11 |     MODEL = os.getenv("MODEL", "./models/joeynmt/")
12 |     TEMP = "./temp/"
13 |     MODEL_ALL_FILE = "./available_models.tsv"
14 |     JSON = "./languages.json"
15 |     
16 |   
17 | class DevelopmentConfig(Config):
18 |     DEBUG = True
19 |     SECRET_KEY = 'super-secret-key'    
20 |     basedir = os.path.abspath(os.path.dirname(__file__))
21 |     FLASK_DEBUG=1
22 | 
23 | 
24 | class StagingConfig(Config):
25 |     """
26 |     This is an imitation of the production environment for 
27 |     testing purpose. 
28 |     """
29 |     DEBUG = True
30 |     TESTING = True
31 |     SECRET_KEY = os.getenv('SECRET_KEY', "key_testing")
32 |     # MODEL = os.getenv('MODEL', "./")
33 | 
34 | 
35 | class ProductionConfig(Config):
36 |     SECRET_KEY = os.getenv('SECRET_KEY', "key_production")
37 |     # MODEL = os.getenv('MODEL', "./")
38 | 


--------------------------------------------------------------------------------
/src/server/core/utils_bucket/upload_download.py:
--------------------------------------------------------------------------------
 1 | from os import name, path
 2 | from google.cloud.storage import Blob
 3 | from google.cloud import storage
 4 | 
 5 | 
 6 | client = storage.Client(project="dsfsi-232208")
 7 | bucket = client.get_bucket("maskhane-web-test")
 8 | encryption_key = "c7f32af42e45e85b9848a6a14dd2a8f6"
 9 |  
10 | # blob = Blob("secure-data", bucket, encryption_key=encryption_key)
11 | blob = Blob("secure-data", bucket)
12 | 
13 | 
14 | 
15 | # Download
16 | # blob.upload_from_string("my secret message.")
17 | # with open("/tmp/my-secure-file", "wb") as file_obj:
18 | #     client.download_to_file(blob, file_obj)
19 | 
20 | if __name__ == "__main__":
21 |     path_to_file_for_upload = "../../data/external/available_models.tsv"
22 |     # if (path.exists(path_to_file_for_upload)):
23 |     #     # Upload
24 |     #     with open(path_to_file_for_upload, "rb") as my_file:
25 |     #         print("yes")
26 |     #         blob.upload_from_file(my_file)
27 | 
28 |     where_to_download = "../../data/"
29 |     with open(where_to_download, "wb") as file_obj:
30 |         client.download_to_file(blob, file_obj)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Data Science for Social Impact @ University of Pretoria
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/todo.md:
--------------------------------------------------------------------------------
 1 | - should not put the model in a docker container, use the file storage instead and make it available as a volume to the container
 2 | - use a model registry to store models, build one with mlflow.
 3 | - Run different services for each model, and use a load balancer to route the requests to the right model.
 4 | 
 5 | 
 6 | 
 7 | torch-model-archiver --model-name MasaknaneEnSwaRelNews \
 8 | --version 1.0 \
 9 | --serialized-file src/torchserve/transformer_models/masakhane/m2m100_418M_en_swa_rel_news/pytorch_model.bin \
10 | --handler src/torchserve/transformer_handler.py \
11 | --extra-files "src/torchserve/transformer_models/masakhane/m2m100_418M_en_swa_rel_news/config.json,
12 |                src/torchserve/transformer_models/masakhane/m2m100_418M_en_swa_rel_news/special_tokens_map.json,
13 |                src/torchserve/transformer_models/masakhane/m2m100_418M_en_swa_rel_news/tokenizer_config.json,
14 |                src/torchserve/transformer_models/masakhane/m2m100_418M_en_swa_rel_news/vocab.json,
15 |                src/torchserve/transformer_models/masakhane/m2m100_418M_en_swa_rel_news/generation_config.json,
16 |                src/torchserve/transformer_models/masakhane/m2m100_418M_en_swa_rel_news/sentencepiece.bpe.model"   
17 | 


--------------------------------------------------------------------------------
/src/server/core/utils_bucket/bucket.py:
--------------------------------------------------------------------------------
 1 | from google.cloud import storage
 2 | from google.oauth2 import service_account
 3 | import pathlib, io, ipdb
 4 | 
 5 | # credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE)
 6 | 
 7 | client = storage.Client(
 8 |     project="dsfsi-232208",
 9 |     # credentials=credentials
10 | )
11 | 
12 | 
13 | from google.cloud import storage
14 | from zipfile import ZipFile, ZipInfo
15 | 
16 | def upload():
17 |     source_dir = pathlib.Path("../../models/joeynmt/en-lua/")
18 | 
19 |     archive = io.BytesIO()
20 |     with ZipFile(archive, 'w') as zip_archive:
21 |         for file_path in source_dir.iterdir():
22 |             # ipdb.set_trace()
23 |             with open(file_path, 'r') as file:
24 |                 zip_entry_name = file_path.name
25 |                 zip_file = ZipInfo(zip_entry_name)
26 |                 zip_archive.writestr(zip_file, file.read())
27 |             
28 |     ipdb.set_trace()
29 |     archive.seek(0)
30 | 
31 |     object_name = 'super-important-data-v1'
32 |     bucket = client.bucket("maskhane-web-test")
33 | 
34 |     blob = storage.Blob(object_name, bucket)
35 |     blob.upload_from_file(archive, content_type='application/zip')
36 | 
37 | upload()


--------------------------------------------------------------------------------
/src/client/src/pages/Home.js:
--------------------------------------------------------------------------------
 1 | import { Navbar, Nav, Container, Jumbotron, Image, Row, Col } from 'react-bootstrap'
 2 | import React from 'react';
 3 | import TranslateCard from '../components/translateCard';
 4 | import image from '../images/masakhane-border.png';
 5 | 
 6 | function Home() {
 7 |   return (
 8 |     <div>
 9 |       <Container className="my-4">
10 |         
11 |         <br />
12 |         <br />
13 |         <TranslateCard />
14 |         <br />
15 |         <p style={{fontSize: 12, color: 'gray'}}>This is a community research project and as such, <b style={{fontSize: 12, color: 'black'}}> this service is not a production system. Therefore, it should not be used for official translations</b>. Don't see your language and interested in training one up yourself? Go <span style={{color: 'blue'}}><a href="/about">here</a></span> to learn how to contribute a model! </p>
16 |         <p style={{fontSize: 12, color: 'gray'}}> The models are powered by <span style={{color: 'blue'}}><a href="https://joeynmt.readthedocs.io/en/latest/tutorial.html">JoeyNMT</a></span><span role="img" aria-label="koala">🐨</span>; a minimalist machine translation toolkit based on pytorch. </p>
17 |       </Container>
18 |     </div>
19 |   );
20 | }
21 | 
22 | export default Home;
23 | 


--------------------------------------------------------------------------------
/src/server/Dockerfile:
--------------------------------------------------------------------------------
 1 | # base image
 2 | FROM python:3.6.9
 3 | 
 4 | # set working directory 
 5 | WORKDIR /usr/src/app
 6 | 
 7 | # set environment variables
 8 | ENV PYTHONDONTWRITEBYTECODE 1
 9 | ENV PYTHONUNBUFFERED 1
10 | 
11 | # install system dependencies
12 | RUN apt-get update && apt-get install -y netcat
13 | RUN apt-get update
14 | RUN apt-get install -y gnupg lsb-release wget
15 | 
16 | RUN lsb_release -c -s > /tmp/lsb_release
17 | RUN GCSFUSE_REPO=$(cat /tmp/lsb_release); echo "deb http://packages.cloud.google.com/apt gcsfuse-$GCSFUSE_REPO main" | tee /etc/apt/sources.list.d/gcsfuse.list
18 | RUN wget -O - https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
19 | 
20 | RUN apt-get update
21 | RUN apt-get install -y gcsfuse
22 | 
23 | 
24 | # add and 
25 | COPY ./requirements.txt /usr/src/app/requirements.txt
26 | # RUN pip install to install requirements 
27 | RUN pip install --upgrade pip
28 | RUN pip install -r requirements.txt
29 | 
30 | # add entrypoint.sh
31 | COPY ./entrypoint.sh /usr/src/app/entrypoint.sh
32 | 
33 | # add app
34 | COPY . /usr/src/app
35 | 
36 | 
37 | # run server (https://github.com/testdrivenio/testdriven-app/issues/25)
38 | CMD ["sh","-c","chmod 777 /usr/src/app/entrypoint.sh"]
39 | ENTRYPOINT ["/usr/src/app/entrypoint.sh"]
40 | 


--------------------------------------------------------------------------------
/src/m_to_m_models/main.py:
--------------------------------------------------------------------------------
 1 | from src.seldon_core_components.app import create_app
 2 | from typing  import Tuple, List
 3 | from pydoc import locate
 4 | import argparse
 5 | 
 6 | def parse_args() -> Tuple[argparse.Namespace, List[str]]:
 7 |     """parse the following arguments
 8 |     --model_handler : the path to the class of the model handler
 9 |     --model_path : the path to the model
10 |     --src_lang : the source language
11 |     --trg_lang : the target language
12 |     Returns:
13 |         Tuple[argparse.Namespace, List[str]]: _description_
14 |     """
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument("--model_handler", type=str, required=True)
17 |     parser.add_argument("--model_path", type=str, required=True)
18 |     parser.add_argument("--src_lang", type=str, required=True)
19 |     parser.add_argument("--trg_lang", type=str, required=True)
20 |     args, unknown = parser.parse_known_args()
21 |     return args, unknown
22 | 
23 | 
24 | def main():
25 |     args, _ = parse_args()
26 |     ModelHandleClass = locate(args.model_handler)
27 |     model_handler = ModelHandleClass(args.model_path, args.src_lang, args.trg_lang)
28 |     app = create_app(model_handler)
29 |     app.run()
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/src/server/core/models/language.py:
--------------------------------------------------------------------------------
 1 | from enum import unique
 2 | 
 3 | from flask_sqlalchemy import SQLAlchemy 
 4 | 
 5 | from core.extensions import db
 6 | 
 7 | class Language(db.Model):
 8 |     __tablename__ = 'language'
 9 |     # id = db.Column(db.Integer, primary_key=True)
10 |     src_tgt_dmn = db.Column(db.String(50), primary_key=True)
11 |     source_target_domain = db.Column(db.String(50), nullable=True)
12 |     
13 |     created_at = db.Column(db.DateTime(), nullable=False,\
14 |                                 server_default=db.func.now())
15 |     update_at = db.Column(db.DateTime(), nullable=False,\
16 |                                 server_default=db.func.now(), onupdate=db.func.now()) 
17 | 
18 |     def __init__(self, src_tgt_dmn, source_target_domain="") :
19 |         super().__init__()
20 |         self.src_tgt_dmn = src_tgt_dmn
21 |         self.source_target_domain = source_target_domain
22 | 
23 |     def save(self):
24 |         db.session.add(self)
25 |         db.session.commit()
26 | 
27 |     def to_json(self):
28 |         source, target, domain = self.src_tgt_dmn.split('-')
29 |         return {
30 |             'source': source,
31 |             'target': target,
32 |             'src-tgt_domn' : self.source_target_domain,
33 |             'domain': domain
34 |         }


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 | 
 5 |   server:
 6 |     # container_name: flask-api
 7 |     build :
 8 |       context: ./src/server
 9 |       dockerfile: Dockerfile
10 |     command: python manage.py run -h 0.0.0.0
11 |     volumes:
12 |       - './src/server:/usr/src/app'
13 |       - './models/joeynmt:/usr/src/app/models/joeynmt'
14 |     ports:
15 |       - 5000:5000
16 | 
17 |     env_file:
18 |       - ./src/server/.env.dev
19 | 
20 |     depends_on: 
21 |       - db
22 | 
23 |   db:
24 |     image: postgres:12-alpine
25 |     volumes:
26 |       - postgres_data:/var/lib/postgresql/data/
27 |     environment:
28 |       - POSTGRES_USER=masakhane
29 |       - POSTGRES_PASSWORD=masakhane
30 |       - POSTGRES_DB=masakhane
31 | 
32 |   client:
33 |     build :
34 |       context: ./src/client
35 |       dockerfile: Dockerfile
36 |     # command: curl --location --request GET 'http://0.0.0.0:5000/update' --data-raw ''
37 |     volumes:
38 |       - './src/client:/usr/src/app'
39 |     ports:
40 |       - 3000:3000
41 | 
42 |     depends_on: 
43 |       - server
44 | 
45 | # To persist the data beyond the life of the container 
46 | # we configured a volume. This config will bind 
47 | # postgres_data to the "/var/lib/postgresql/data/" directory in the container.
48 | volumes:
49 |   postgres_data:
50 | 


--------------------------------------------------------------------------------
/src/client/public/bundle.js.LICENSE.txt:
--------------------------------------------------------------------------------
 1 | /*
 2 | object-assign
 3 | (c) Sindre Sorhus
 4 | @license MIT
 5 | */
 6 | 
 7 | /*!
 8 |   Copyright (c) 2017 Jed Watson.
 9 |   Licensed under the MIT License (MIT), see
10 |   http://jedwatson.github.io/classnames
11 | */
12 | 
13 | /** @license React v0.20.1
14 |  * scheduler.production.min.js
15 |  *
16 |  * Copyright (c) Facebook, Inc. and its affiliates.
17 |  *
18 |  * This source code is licensed under the MIT license found in the
19 |  * LICENSE file in the root directory of this source tree.
20 |  */
21 | 
22 | /** @license React v16.13.1
23 |  * react-is.production.min.js
24 |  *
25 |  * Copyright (c) Facebook, Inc. and its affiliates.
26 |  *
27 |  * This source code is licensed under the MIT license found in the
28 |  * LICENSE file in the root directory of this source tree.
29 |  */
30 | 
31 | /** @license React v17.0.1
32 |  * react-dom.production.min.js
33 |  *
34 |  * Copyright (c) Facebook, Inc. and its affiliates.
35 |  *
36 |  * This source code is licensed under the MIT license found in the
37 |  * LICENSE file in the root directory of this source tree.
38 |  */
39 | 
40 | /** @license React v17.0.1
41 |  * react.production.min.js
42 |  *
43 |  * Copyright (c) Facebook, Inc. and its affiliates.
44 |  *
45 |  * This source code is licensed under the MIT license found in the
46 |  * LICENSE file in the root directory of this source tree.
47 |  */
48 | 


--------------------------------------------------------------------------------
/src/m_to_m_models/model_handlers.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoTokenizer
 2 | from optimum.onnxruntime import ORTModelForSeq2SeqLM
 3 | from optimum.pipelines import pipeline
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | class OptimizedM100Model:
 8 |     def __init__(self, model_path, src_lang, tgt_lang):
 9 |         model_path = Path(model_path)
10 |         assert model_path.exists(), "Model path does not exist"
11 |         print("start loading the model........")
12 |         self._model = ORTModelForSeq2SeqLM.from_pretrained(model_path)
13 |         print("Model loaded successfully!")
14 |         self._tokenizer = AutoTokenizer.from_pretrained(model_path)
15 |         print("Tokenizer loaded successfully")
16 |         self.pipeline = pipeline(f"translation_{src_lang}_to_{tgt_lang}", model=self._model, tokenizer=self._tokenizer)
17 |         print("Pipeline created successfully")
18 |     
19 |     def predict_raw(self, X):
20 |         data_to_translate = X.get("data")
21 |         output = self.pipeline(data_to_translate)
22 |         return output
23 |     
24 |     def health_status(self):
25 |         text_to_translate = {"data": "Hello, my name is Espoir Murhabazi,  I am a Software Engineer from Congo DRC but living in UK"}
26 |         translation = self.predict_raw(text_to_translate)
27 |         assert len(translation) == 1, "health check returning bad translation"
28 |         assert translation[0].get("translation_text") is not None, "health check returning bad translation"
29 |         return translation[0].get("translation_text")
30 | 


--------------------------------------------------------------------------------
/src/server/core/models/feedback.py:
--------------------------------------------------------------------------------
 1 | from enum import unique
 2 | 
 3 | from flask_sqlalchemy import SQLAlchemy 
 4 | 
 5 | from core.extensions import db
 6 | 
 7 | class Feedback(db.Model):
 8 |     __tablename__ = 'feedback'
 9 |     id = db.Column(db.Integer, primary_key=True)
10 |     
11 |     src_lang = db.Column(db.String(20), nullable=False)
12 |     tgt_lang = db.Column(db.String(20), nullable=False)
13 |     accurate_translation = db.Column(db.String(800), nullable=False)
14 |     know_src_lang = db.Column(db.String(50), nullable=False)
15 |     know_tgt_lang = db.Column(db.String(50), nullable=False)
16 |     own_translation = db.Column(db.String(800), nullable=True)
17 |     translation = db.Column(db.String(800), nullable=False)
18 |     text = db.Column(db.String(800), nullable=False)
19 |     understand_translation = db.Column(db.String(50), nullable=False)
20 |     feedbackToken = db.Column(db.String(100), nullable=False)
21 | 
22 | 
23 |     created_at = db.Column(db.DateTime(), nullable=False,\
24 |                                 server_default=db.func.now())
25 |     update_at = db.Column(db.DateTime(), nullable=False,\
26 |                                 server_default=db.func.now(), onupdate=db.func.now())
27 | 
28 |     # TODO We need to decide how we deal with duplicate on the review saving
29 |     # __table_args__ = (
30 |     #     # this can be db.PrimaryKeyConstraint if you want it to be a primary key
31 |     #     db.UniqueConstraint('input', 'review', 'stars'),)
32 |       
33 | 
34 |     def save(self):
35 |         db.session.add(self)
36 |         db.session.commit()
37 |         


--------------------------------------------------------------------------------
/src/server/core/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | from flask import current_app
 5 | from flask_testing import TestCase
 6 | 
 7 | from core import masakhane
 8 | 
 9 | 
10 | class TestDevelopmentConfig(TestCase):
11 |     def create_app(self):
12 |         masakhane.config.from_object('core.config.DevelopmentConfig')
13 |         return masakhane
14 | 
15 |     def test_app_is_development(self):
16 |         self.assertTrue(masakhane.config['SECRET_KEY'] == "super-secret-key")
17 |         self.assertFalse(current_app is None)
18 |         self.assertTrue(
19 |             masakhane.config['SQLALCHEMY_DATABASE_URI'] == 
20 |                 os.getenv('DATABASE_TEST_URL', "sqlite:///masakhane.db")
21 |         )
22 | 
23 | class TestTestingConfig(TestCase):
24 |     def create_app(self):
25 |         masakhane.config.from_object('core.config.StagingConfig')
26 |         return masakhane
27 | 
28 |     def test_app_is_testing(self):
29 |         self.assertTrue(masakhane.config['SECRET_KEY'] == "key_testing")
30 |         self.assertTrue(masakhane.config['TESTING'])
31 |         self.assertTrue(
32 |             masakhane.config['SQLALCHEMY_DATABASE_URI'] == 
33 |                 os.getenv('DATABASE_TEST_URL', "sqlite:///masakhane.db")
34 |         )
35 | 
36 | class TestProductionConfig(TestCase):
37 |     def create_app(self):
38 |         masakhane.config.from_object('core.config.ProductionConfig')
39 |         return masakhane
40 | 
41 |     def test_app_is_production(self):
42 |         self.assertTrue(masakhane.config['SECRET_KEY'] == "key_production")
43 |         self.assertFalse(masakhane.config['TESTING'])
44 | 
45 | if __name__ == '__main__':
46 |     unittest.main()


--------------------------------------------------------------------------------
/src/m_to_m_models/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10 as base
 2 | LABEL maintainer="Espoir Murhabazi<espoir.mur [] gmail>"
 3 | 
 4 | 
 5 | # Never prompt the user for choices on installation/configuration of packages
 6 | ENV DEBIAN_FRONTEND noninteractive
 7 | ENV PYTHONUNBUFFERED=1 \
 8 |     PORT=9000 \
 9 |     PYTHONDONTWRITEBYTECODE=1 \
10 |     PIP_NO_CACHE_DIR=off \
11 |     PIP_DISABLE_PIP_VERSION_CHECK=on \
12 |     PIP_DEFAULT_TIMEOUT=100 
13 | 
14 | 
15 | FROM base AS python-deps
16 | RUN apt-get update \
17 |     && apt-get install --no-install-recommends -y \
18 |         curl \
19 |         build-essential\
20 |         software-properties-common
21 | 
22 | RUN python -m venv /opt/venv
23 | # Make sure we use the virtualenv:
24 | ENV PATH="/opt/venv/bin:$PATH"
25 | 
26 | # Install pip
27 | COPY requirements.txt ./
28 | RUN pip install --upgrade pip
29 | RUN pip install -r requirements.txt
30 | 
31 | 
32 | 
33 | FROM base AS runtime
34 | # copy nltk data
35 | COPY --from=python-deps /opt/venv /opt/venv
36 | 
37 | 
38 | RUN useradd --create-home masakhane
39 | RUN usermod -aG sudo masakhane
40 | RUN mkdir /home/masakhane/translation_app/
41 | ENV WORKING_DIR=/home/masakhane/translation_app/
42 | ENV PATH="${WORKING_DIR}:$PATH"
43 | ENV PATH="/opt/venv/bin:$PATH"
44 | ENV PYTHONPATH="/opt/venv/bin:$PYTHONPATH"
45 | ENV PYTHONPATH="${PYTHONPATH}:${WORKING_DIR}"
46 | 
47 | ENV MODEL_NAME model_handlers.OptimizedM100Model
48 | 
49 | ENV SERVICE_TYPE MODEL
50 | 
51 | COPY model_handlers.py ${WORKING_DIR}
52 | WORKDIR ${WORKING_DIR}
53 | RUN chown -R masakhane:masakhane ${WORKING_DIR}
54 | RUN chmod -R 777 ${WORKING_DIR}
55 | USER masakhane
56 | EXPOSE 9000 5000
57 | 
58 | CMD exec seldon-core-microservice $MODEL_NAME --service-type $SERVICE_TYPE
59 | 


--------------------------------------------------------------------------------
/src/client/src/components/multiStepForm.js:
--------------------------------------------------------------------------------
 1 | import { useForm, useStep } from "react-hooks-helper";
 2 | import React from 'react';
 3 | 
 4 | import Terms from "./terms";
 5 | import Step1 from "./step1";
 6 | import Step2 from "./step2";
 7 | import Step3 from "./step3";
 8 | 
 9 | const steps = [
10 |     { id: "terms" },
11 |     { id: "step1" },
12 |     { id: "step2" },
13 |     { id: "step3" },
14 | ];
15 | 
16 | const defaultData = {
17 |     know_src_lang: "little",
18 |     know_tgt_lang: "little",
19 |     understand_translation: "none",
20 |     accurate_translation: "nonsense",
21 |     own_translation: ""
22 | };
23 | 
24 | const MultiStepForm = ({ src_lang, tgt_lang, text, translation, setShow, submitFeedBack, setFeedbackToken, feedbackToken}) => {
25 |     const [formData, setForm] = useForm({...defaultData, src_lang, tgt_lang, text, translation, feedbackToken});
26 |     const { step, navigation } = useStep({ initialStep: 0, steps });
27 |     const { id } = step;
28 | 
29 |     const handleSubmitFeedback = () => {
30 |         console.log({formData});
31 |         // set formData to be feedback form
32 |         submitFeedBack(formData);
33 |     }
34 | 
35 |     const props = { src_lang, tgt_lang, text, translation, setShow, formData, setForm, navigation, handleSubmitFeedback, setFeedbackToken, feedbackToken};
36 | 
37 |     switch (id) {
38 |         case "terms":
39 |             return <Terms {...props} />;
40 |         case "step1":
41 |             return <Step1 {...props} />;
42 |         case "step2":
43 |             return <Step2 {...props} />;
44 |         case "step3":
45 |             return <Step3 {...props} />;
46 | 
47 |         default:
48 |             return null;
49 |     }
50 | }
51 | 
52 | export default MultiStepForm;
53 | 


--------------------------------------------------------------------------------
/src/server/core/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | # external imports
 3 | from flask import Flask
 4 | from flask_migrate import Migrate
 5 | from flask_restful import Api
 6 | from flask_cors import CORS
 7 | # internal imports
 8 | from core.resources.translate import TranslateResource, AddResource, SaveResource, HomeResource
 9 | from core.extensions import db
10 | from core.config import Config, DevelopmentConfig, ProductionConfig, StagingConfig
11 | 
12 | 
13 | #application factory
14 | def create_app(saved_models):
15 |     """Flask application factory to config and init app"""
16 |     env = os.environ.get('ENV', 'Development')
17 |     if env == 'Production':
18 |         config_str = ProductionConfig()
19 |     elif env == 'Staging':
20 |         config_str = StagingConfig()
21 |     else:
22 |         config_str = DevelopmentConfig()
23 | 
24 |     app = Flask(__name__)
25 |     CORS(app)
26 |     app.config.from_object(config_str)
27 |     # database init
28 |     register_extensions(app)
29 |     # api init
30 |     register_resources(app, saved_models)
31 | 
32 |     return app
33 | 
34 | 
35 | def register_extensions(app):
36 |     db.init_app(app)
37 |     migrate = Migrate(app, db)
38 | 
39 | 
40 | def register_resources(app, saved_models):
41 |     api = Api(app)
42 |     api.add_resource(HomeResource, '/') 
43 |     api.add_resource(TranslateResource, '/translate', resource_class_kwargs={'saved_models': saved_models})
44 |     # TODO need to find a better way to updte the current app information whithout exposing to the public
45 |     api.add_resource(AddResource, '/update', resource_class_kwargs={'saved_models': saved_models})
46 |     api.add_resource(SaveResource, '/save')
47 | 
48 | 
49 | models = {}
50 | masakhane = create_app(models)
51 | masakhane.models = models
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | 
  5 | # C extensions
  6 | *.so
  7 | 
  8 | # Distribution / packaging
  9 | .Python
 10 | env/
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | *.egg-info/
 23 | .installed.cfg
 24 | *.egg
 25 | 
 26 | # PyInstaller
 27 | #  Usually these files are written by a python script from a template
 28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 29 | *.manifest
 30 | *.spec
 31 | 
 32 | # Installer logs
 33 | pip-log.txt
 34 | pip-delete-this-directory.txt
 35 | 
 36 | # Unit test / coverage reports
 37 | htmlcov/
 38 | .tox/
 39 | .coverage
 40 | .coverage.*
 41 | .cache
 42 | nosetests.xml
 43 | coverage.xml
 44 | *.cover
 45 | 
 46 | # Translations
 47 | *.mo
 48 | *.pot
 49 | 
 50 | # Django stuff:
 51 | *.log
 52 | 
 53 | # Sphinx documentation
 54 | docs/_build/
 55 | 
 56 | # PyBuilder
 57 | target/
 58 | 
 59 | # DotEnv configuration
 60 | .env
 61 | 
 62 | # Database
 63 | *.db
 64 | *.rdb
 65 | 
 66 | # Pycharm
 67 | .idea
 68 | 
 69 | # VS Code
 70 | .vscode/
 71 | 
 72 | # Spyder
 73 | .spyproject/
 74 | 
 75 | # Jupyter NB Checkpoints
 76 | .ipynb_checkpoints/
 77 | 
 78 | # exclude data from source control by default
 79 | /data/
 80 | 
 81 | # Mac OS-specific storage files
 82 | .DS_Store
 83 | 
 84 | # vim
 85 | *.swp
 86 | *.swo
 87 | 
 88 | # Mypy cache
 89 | .mypy_cache/
 90 | 
 91 | src/back-end/joeynmt/models/
 92 | models/joeynmt/
 93 | 
 94 | # node modules
 95 | node_modules/
 96 | 
 97 | #cache
 98 | .eslintcache
 99 | 
100 | src/server/models/joeynmt
101 | src/server/core/models/joeynmt
102 | 
103 | .env.prod
104 | 
105 | *.sqlite
106 | 
107 | ### ignore model export
108 | 
109 | onnx/
110 | *.onnx
111 | model_store/
112 | logs/
113 | 


--------------------------------------------------------------------------------
/src/client/src/pages/Faq.js:
--------------------------------------------------------------------------------
 1 | import { Container, Card } from 'react-bootstrap'
 2 | import React from 'react';
 3 | 
 4 | export default function FAQPage() {
 5 |     return(
 6 |         <div>
 7 |             <Container className="my-4">
 8 |                 <Card style={{ width: '100%' }}>
 9 |                     <Card.Body>
10 |                         <Card.Title>FAQ</Card.Title>
11 |                         {/* <Card.Subtitle className="mb-2 text-muted">Enter  subtitle here</Card.Subtitle> */}
12 |                         <div>
13 |                             <Card.Text style={{ fontSize: 16, color: 'black' }}>
14 |                             1. I was not happy with the translation I got from the service.
15 |                             </Card.Text>
16 |                         </div>
17 |                         <br />
18 |                         <div>
19 |                             <Card.Text style={{ fontSize: 14, color: 'grey' }}>
20 |                             Thank you for trying this service. The Masakhane NLP Translation project built the models used to do the translation. 
21 |                             This website provides a way for us to be able to test how well these models work. This service is still a work in progress and we expect the models to be improved every few months as we get more feedback from users such as yourself. 
22 |                             Please do provide feedback by writing where there is a mistake in the translation so we can provide this information to the researchers. 
23 |                             As such, this service is not a production system (should not be used for official translations).
24 |                             </Card.Text>
25 |                         </div>
26 |                         <br />
27 |                     </Card.Body>
28 |                 </Card>
29 |             </Container>
30 |         </div>
31 |     )
32 | }


--------------------------------------------------------------------------------
/src/server/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.11.0
 2 | alembic==1.5.4
 3 | aniso8601==8.1.1
 4 | astroid==2.4.2
 5 | backcall==0.2.0
 6 | cachetools==4.2.1
 7 | chardet==4.0.0
 8 | click==7.1.2
 9 | cycler==0.10.0
10 | decorator==4.4.2
11 | Flask==1.1.2
12 | Flask-Cors==3.0.10
13 | Flask-Migrate==2.6.0
14 | Flask-RESTful==0.3.8
15 | Flask-SQLAlchemy==2.4.4
16 | future==0.18.2
17 | google-auth==1.26.1
18 | google-auth-oauthlib==0.4.2
19 | grpcio==1.35.0
20 | gdown==4.6.0
21 | idna==2.10
22 | importlib-metadata==3.4.0
23 | ipdb==0.13.4
24 | ipython==7.16.1
25 | ipython-genutils==0.2.0
26 | isort==5.7.0
27 | itsdangerous==1.1.0
28 | jedi==0.18.0
29 | Jinja2==2.11.3
30 | joeynmt==1.2
31 | kiwisolver==1.3.1
32 | lazy-object-proxy==1.4.3
33 | Mako==1.1.4
34 | Markdown==3.3.3
35 | MarkupSafe==1.1.1
36 | matplotlib==3.3.4
37 | mccabe==0.6.1
38 | Morfessor==2.0.6
39 | numpy==1.18.5
40 | oauthlib==3.1.0
41 | pandas==1.1.5
42 | parso==0.8.1
43 | pexpect==4.8.0
44 | pickleshare==0.7.5
45 | Pillow==8.1.0
46 | polyglot==16.7.4
47 | portalocker==2.2.1
48 | prompt-toolkit==3.0.16
49 | protobuf==3.14.0
50 | psycopg2-binary==2.8.6
51 | ptyprocess==0.7.0
52 | pyasn1==0.4.8
53 | pyasn1-modules==0.2.8
54 | pycld2==0.41
55 | pyglot==0.1.1
56 | Pygments==2.7.4
57 | PyICU==2.6
58 | pylint==2.6.0
59 | pyparsing==2.4.7
60 | python-dateutil==2.8.1
61 | python-editor==1.0.4
62 | pytz==2021.1
63 | PyYAML==5.4.1
64 | requests==2.25.1
65 | requests-oauthlib==1.3.0
66 | rsa==4.7
67 | sacrebleu==1.5.0
68 | scipy==1.5.4
69 | seaborn==0.11.1
70 | simplejson==3.17.2
71 | six==1.12.0
72 | SQLAlchemy==1.3.23
73 | subword-nmt==0.3.7
74 | tensorboard==2.4.1
75 | tensorboard-plugin-wit==1.8.0
76 | toml==0.10.2
77 | torch==1.7.1
78 | tqdm==4.56.2
79 | traitlets==4.3.3
80 | typed-ast==1.4.2
81 | typing-extensions==3.7.4.3
82 | urllib3==1.26.3
83 | wcwidth==0.2.5
84 | Werkzeug==0.16.1
85 | wrapt==1.11.1
86 | zipp==3.4.0
87 | sacremoses==0.0.43
88 | # https://gunicorn.org/#deployment
89 | gunicorn==20.0.4
90 | Flask-Testing==0.6.2


--------------------------------------------------------------------------------
/src/m_to_m_models/kubernetes/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: machinelearning.seldon.io/v1
 2 | kind: SeldonDeployment
 3 | metadata:
 4 |   name: translation-deployment
 5 |   namespace: masakhane
 6 | spec:
 7 |   name: translation-worker
 8 |   predictors:
 9 |   - componentSpecs:
10 |     - spec:
11 |         containers:
12 |         - image: masakhane/translation:alpha 
13 |           name: translation-container
14 |           imagePullPolicy: IfNotPresent
15 |           env:
16 |           - name: TRANSFORMERS_CACHE
17 |             value: "/models_datastore/.cache"
18 |           - name: GUNICORN_WORKERS
19 |             value: '1'
20 |           - name: GRPC_WORKERS
21 |             value: '0'
22 |           - name: SELDON_LOG_LEVEL
23 |             value: DEBUG
24 |           - name: SELDON_DEBUG
25 |             value: 'True'
26 |           - name: FLASK_DEBUG
27 |             value: 'True'
28 |           volumeMounts:
29 |             - mountPath: "/models_datastore/" # mount the cache volume here
30 |               name: translation-volume-storage
31 |           resources:
32 |             requests:
33 |               memory: 8Gi
34 |               cpu: 3
35 |             limits:
36 |               memory: 9Gi
37 |               cpu: 4
38 |         terminationGracePeriodSeconds: 1
39 |         volumes:
40 |         - name: translation-volume-storage
41 |           persistentVolumeClaim:
42 |             claimName: masakhane-model-cache-volume-claim
43 |     graph:
44 |       envSecretRefName: masakhane-container-secret
45 |       children: []
46 |       endpoint:
47 |         type: REST
48 |       name: translation-container
49 |       type: MODEL
50 |       parameters: 
51 |       - name: model_path
52 |         type: STRING
53 |         value: "/models_datastore/" # this should come form volume.
54 |       - name: src_lang
55 |         type: STRING
56 |         value: "en"
57 |       - name: tgt_lang
58 |         type: STRING
59 |         value: "sw"
60 |     labels:
61 |       version: v1
62 |     name: translation-predictor
63 |     replicas: 1
64 | 


--------------------------------------------------------------------------------
/src/server/core/utils.py:
--------------------------------------------------------------------------------
 1 | from torchtext import data
 2 | from torchtext.datasets import TranslationDataset
 3 | 
 4 | 
 5 | from joeynmt.constants import UNK_TOKEN, EOS_TOKEN, BOS_TOKEN, PAD_TOKEN
 6 | 
 7 | 
 8 | class MonoLineDataset(TranslationDataset):
 9 |     def __init__(self, line, field, **kwargs):
10 |         examples = []
11 |         line = line.strip()
12 |         fields = [('src', field)]
13 |         examples.append(data.Example.fromlist([line], fields))
14 |         super(TranslationDataset, self).__init__(examples, fields, **kwargs)
15 | 
16 | 
17 | def load_line_as_data(line, level, lowercase, src_vocab, trg_vocab):
18 |     """
19 |     Create a data set from one line.
20 |     Workaround for the usual torchtext data handling.
21 | 
22 |     :param line: The input line to process.
23 |     :param level: "char", "bpe" or "word". Determines segmentation of the input.
24 |     :param lowercase: If True, lowercases inputs and outputs.
25 |     :param src_vocab: Path to source vocabulary.
26 |     :param trg_vocab: Path to target vocabulary.
27 |     :return:
28 |     """
29 |     if level == "char":
30 |         tok_fun = lambda s: list(s)
31 |     else:  
32 |         # bpe or word, pre-tokenized
33 |         tok_fun = lambda s: s.split()
34 | 
35 |     src_field = data.Field(init_token=None, eos_token=EOS_TOKEN,  
36 |                            pad_token=PAD_TOKEN, tokenize=tok_fun,
37 |                            batch_first=True, lower=lowercase,
38 |                            unk_token=UNK_TOKEN,
39 |                            include_lengths=True)
40 |     trg_field = data.Field(init_token=BOS_TOKEN, eos_token=EOS_TOKEN,
41 |                            pad_token=PAD_TOKEN, tokenize=tok_fun,
42 |                            unk_token=UNK_TOKEN,
43 |                            batch_first=True, lower=lowercase,
44 |                            include_lengths=True)
45 |     test_data = MonoLineDataset(line=line, field=(src_field))
46 |     src_field.vocab = src_vocab
47 |     trg_field.vocab = trg_vocab
48 |     return test_data, src_vocab, trg_vocab


--------------------------------------------------------------------------------
/src/client/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "masakhane",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "dependencies": {
 6 |     "@babel/core": "^7.20.5",
 7 |     "@babel/preset-env": "^7.20.2",
 8 |     "@babel/preset-react": "^7.18.6",
 9 |     "@babel/runtime": "^7.13.17",
10 |     "@testing-library/jest-dom": "^5.11.9",
11 |     "@testing-library/react": "^11.1.0",
12 |     "@testing-library/user-event": "^12.1.10",
13 |     "babel-loader": "^8.3.0",
14 |     "bootstrap": "^4.6.0",
15 |     "core-js": "^3.11.0",
16 |     "file-loader": "^6.2.0",
17 |     "http-proxy-middleware": "^1.3.1",
18 |     "react": "^17.0.1",
19 |     "react-bootstrap": "^1.4.3",
20 |     "react-copy-to-clipboard": "^5.0.3",
21 |     "react-dom": "^17.0.1",
22 |     "react-gtm-module": "^2.0.11",
23 |     "react-hooks-helper": "^1.6.0",
24 |     "react-router-dom": "^5.2.0",
25 |     "react-scripts": "4.0.1",
26 |     "regenerator-runtime": "^0.13.7",
27 |     "url-loader": "^4.1.1",
28 |     "uuid": "^8.3.2",
29 |     "web-vitals": "^0.2.4",
30 |     "webpack": "^5.75.0",
31 |     "webpack-cli": "^3.3.12",
32 |     "webpack-config-utils": "^2.3.1"
33 |   },
34 |   "scripts": {
35 |     "develop": "webpack-dev-server --host 0.0.0.0",
36 |     "start-api": "cd ../server && python app.py",
37 |     "build": "react-scripts build",
38 |     "test": "react-scripts test",
39 |     "eject": "react-scripts eject"
40 |   },
41 |   "eslintConfig": {
42 |     "plugins": [
43 |       "testing-library"
44 |     ],
45 |     "rules": {
46 |       "testing-library/await-async-query": "error",
47 |       "testing-library/no-await-sync-query": "error",
48 |       "testing-library/no-debug": "warn"
49 |     }
50 |   },
51 |   "browserslist": {
52 |     "production": [
53 |       ">0.2%",
54 |       "not dead",
55 |       "not op_mini all"
56 |     ],
57 |     "development": [
58 |       "last 1 chrome version",
59 |       "last 1 firefox version",
60 |       "last 1 safari version"
61 |     ]
62 |   },
63 |   "devDependencies": {
64 |     "eslint": "^7.18.0",
65 |     "eslint-plugin-testing-library": "^3.10.1",
66 |     "webpack-dev-server": "^3.11.2"
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/client/webpack.config.js:
--------------------------------------------------------------------------------
 1 | const path = require('path')
 2 | 
 3 | module.exports = {
 4 |   entry: path.resolve(__dirname, 'src', 'index.js'),
 5 |   output: {
 6 |     path: path.resolve(__dirname, 'public'),
 7 |     filename: 'bundle.js',
 8 |     publicPath: '/'
 9 |   },
10 |   devServer: {
11 |     contentBase: path.resolve(__dirname, 'public'),
12 |     open: true,
13 |     clientLogLevel: 'silent',
14 |     host: '0.0.0.0',
15 |     port: 3000,
16 |     historyApiFallback: true,
17 |     compress: true,
18 |     public: 'translate.masakhane.io:80',
19 |     // proxy: {  
20 |     //   '/': {
21 |     //       target: 'http://localhost:5000',
22 |     //       pathRewrite: { '^/api': '' },
23 |     //   },
24 |     //   "changeOrigin":true
25 |     // }
26 |     proxy: {
27 |       '/': {
28 |         // target: 'http://[::1]:5000',
29 |         // todo: make the ip a configuration environment variable
30 |         target: 'http://45.147.99.147:5000',
31 |         // target: 'http://127.0.0.1:5000',
32 |         bypass: function (req, res, proxyOptions) {
33 |           if (req.headers.accept.indexOf('html') !== -1) {
34 |             console.log('Skipping proxy for browser request.');
35 |             return '/index.html';
36 |           }
37 |         },
38 |       },
39 |     },
40 |   },
41 |   module: {
42 |     rules: [
43 |       {
44 |         test: /\.(jsx|js)$/,
45 |         include: path.resolve(__dirname, 'src'),
46 |         exclude: /node_modules/,
47 |         use: [{
48 |           loader: 'babel-loader',
49 |           options: {
50 |             presets: [
51 |               ['@babel/preset-env', {
52 |                 "targets": "defaults" 
53 |               }],
54 |               '@babel/preset-react'
55 |             ]
56 |           }
57 |         }]
58 |       },
59 |       {
60 |         test: /\.(jpg|png|svg)$/,
61 |         include: path.resolve(__dirname, 'src'),
62 |         exclude: /node_modules/,
63 |         loader: 'url-loader',
64 |         options: {
65 |         limit: 25000,
66 |         performance: {
67 |           hints: false,
68 |           maxEntrypointSize: 512000,
69 |           maxAssetSize: 512000
70 |       }
71 |       },
72 |       
73 |     }
74 |     
75 |     ]
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/server/Dockerfile.prod:
--------------------------------------------------------------------------------
 1 | ###########
 2 | # BUILDER #
 3 | ###########
 4 | 
 5 | # pull official base image
 6 | FROM python:3.6.9 as builder
 7 | 
 8 | 
 9 | # set working directory 
10 | WORKDIR /usr/src/app
11 | 
12 | 
13 | # set environment variables
14 | ENV PYTHONDONTWRITEBYTECODE 1
15 | ENV PYTHONUNBUFFERED 1
16 | 
17 | # install system dependencies
18 | RUN apt-get update && apt-get install -y netcat && \
19 |     apt-get install -y --no-install-recommends gcc
20 | 
21 | 
22 | RUN apt-get update
23 | RUN apt-get install -y gnupg lsb-release wget
24 | 
25 | RUN lsb_release -c -s > /tmp/lsb_release
26 | RUN GCSFUSE_REPO=$(cat /tmp/lsb_release); echo "deb http://packages.cloud.google.com/apt gcsfuse-$GCSFUSE_REPO main" | tee /etc/apt/sources.list.d/gcsfuse.list
27 | RUN wget -O - https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
28 | 
29 | RUN apt-get update
30 | RUN apt-get install -y gcsfuse
31 | 
32 | 
33 | # lint
34 | RUN pip install --upgrade pip
35 | RUN pip install flake8
36 | RUN flake8 --ignore=E501,F401 .
37 | 
38 | # add and install requirements 
39 | COPY ./requirements.txt /usr/src/app/requirements.txt
40 | # RUN pip install -r requirements.txt
41 | RUN pip wheel --no-cache-dir --no-deps --wheel-dir /usr/src/app/wheels -r requirements.txt
42 | 
43 | 
44 | #########
45 | # FINAL #
46 | #########
47 | 
48 | FROM python:3.6.9 
49 | 
50 | # create directory for the app user
51 | RUN mkdir -p /home/app
52 | 
53 | # create the app user
54 | RUN addgroup --system app && adduser --system --group app
55 | 
56 | # create the appropriate directories
57 | ENV HOME=/home/app
58 | ENV APP_HOME=/home/app/web
59 | RUN mkdir $APP_HOME
60 | WORKDIR $APP_HOME
61 | 
62 | # install dependencies
63 | RUN apt-get update && apt-get install -y --no-install-recommends netcat
64 | COPY --from=builder /usr/src/app/wheels /wheels
65 | COPY --from=builder /usr/src/app/requirements.txt .
66 | RUN pip install --upgrade pip
67 | RUN pip install --no-cache /wheels/*
68 | 
69 | # copy entrypoint-prod.sh
70 | COPY ./entrypoint.prod.sh $APP_HOME
71 | 
72 | 
73 | # copy project
74 | COPY . $APP_HOME
75 | 
76 | # chown all the files to the app user
77 | RUN chown -R app:app $APP_HOME
78 | 
79 | # change to the app user
80 | USER app
81 | 
82 | # run entrypoint.prod.sh
83 | ENTRYPOINT ["/home/app/web/entrypoint.prod.sh"]


--------------------------------------------------------------------------------
/docs/start_app_prod_doc.md:
--------------------------------------------------------------------------------
 1 | # **Running the App In Production**
 2 | To run the app locally, see [here](start_app_locally_doc.md)
 3 | 
 4 | ## **Table of Contents**
 5 |   - [**Docker Setup**](#docker-setup)
 6 |   - [**Running the app**](#running-the-app)
 7 |     - [**Building the App**](#building-the-app)
 8 |     - [**Shut down the app**](#shut-down-the-app)
 9 |     - [**Add, Update, \& Delete Languages**](#add-update--delete-languages)
10 |     - [**Running tests**](#running-tests)
11 | 
12 | 
13 | ## **Docker Setup**
14 | 
15 | Ensure you have `docker` & `docker-compose` installed on your computer, you can check with the following commands:
16 | ```bash
17 | docker --version
18 | docker-compose --version
19 | ```
20 | 
21 | If the above commands return an error, please install [Docker](https://docs.docker.com/engine/install/) and [Docker-compose](https://docs.docker.com/compose/install/).
22 | 
23 | ## **Running the app**
24 | ###  **Building the App**
25 | To build the app, from the root project directory, run the following command:
26 | ```bash
27 | docker-compose -f docker-compose.prod.yml up -d --build
28 | ```
29 | 
30 | ### **Shut down the app**
31 | To shut down the app, run the following command to remove the docker container:
32 | ```bash
33 | docker-compose -f docker-compose.prod.yml down
34 | ```
35 | 
36 | ### **Add, Update, & Delete Languages**
37 | **Add a Language**
38 | ```bash
39 | docker-compose -f docker-compose.yml exec api python manage.py add_language en-sw-JW300
40 | ```
41 | The language code parameter `en-sw-JW300` represents {src-lang}-{tgt-lang}-{shortform}  
42 | So `en-sw-JW300` represents English-Swahili using JW300 shortform  
43 | **Note** - A code parameter example without shortform is `en-tiv-`
44 | 
45 | Download available languages csv [here](https://zenodo.org/record/7417644/files/masakhane-mt-current-models.csv) 
46 | 
47 | **Update Langugaes**
48 | ```bash
49 | curl --request GET 'http://127.0.0.1:5000/update'
50 | ```
51 | 
52 | **Check available languages**
53 | ```bash
54 | docker-compose -f docker-compose.prod.yml exec api python manage.py all_languages
55 | ```
56 | 
57 | **Remove a language**
58 | ```bash
59 | docker-compose -f docker-compose.prod.yml exec api python manage.py remove_language en-sw-JW300
60 | ```
61 | 
62 | ### **Running tests**
63 | ```bash
64 | docker-compose -f docker-compose.prod.yml exec api python manage.py tests
65 | ```


--------------------------------------------------------------------------------
/src/server/core/tests/test_app.py:
--------------------------------------------------------------------------------
 1 | # test_hello.py
 2 | # from app import create_app
 3 | from flask import json, jsonify
 4 | 
 5 | import os
 6 | import unittest
 7 | 
 8 | from flask import current_app
 9 | from flask_testing import TestCase
10 | from core import masakhane, load_model, create_app
11 | 
12 | # from core import masakhane
13 | from core.tests.base import BaseTestCase
14 | 
15 | class TestAppService(BaseTestCase):
16 | 
17 |     def test_home_page(self):
18 |         "Test the home endpoint"
19 |         app = masakhane
20 |         response = app.test_client().get('/')
21 | 
22 |         data = response.get_json()
23 | 
24 |         assert response.status_code == 200
25 | 
26 |         assert data['message'] == "welcome Masakhane Web"
27 | 
28 |     # TODO We will need to have a dump database to check this 
29 |     # def test_translation(self):
30 |     #     app = masakhane
31 |     #     response = app.test_client().post(
32 |     #         '/translate',
33 |     #         data = json.dumps({
34 |     #             "src_lang":"English",
35 |     #             "tgt_lang":"swahili",
36 |     #             "input":"My name is Salomon"
37 |     #             }),
38 |     #             content_type='application/json',
39 |     #             )
40 | 
41 |     #     data = response.get_json()
42 | 
43 |     #     # assert response.status_code == 201 # created
44 | 
45 |     #     # Givent that we can't know exactly the output of the translation
46 |     #     # we can test that some result are return 
47 |     #     print(data)
48 |     #     assert data['output'] != ""  
49 |         
50 |     # def test_save():
51 |     #     """
52 |     #     Test the save endpoint by checking the status code 
53 |     #     and the responce message.
54 |     #     """
55 |     #     app = create_app()
56 |     #     response = app.test_client().post(
57 |     #                     '/save',
58 |     #                     data = json.dumps({
59 |     #                         "src_lang":"en",
60 |     #                         "tgt_lang":"sw",
61 |     #                         "input":"How are you doing today ?",
62 |     #                         "review":"Test Saving",
63 |     #                         "stars":"5",
64 |     #                         "token":"ww2wki&idjj11yyy"}),
65 |     #                         content_type='application/json',
66 |     #                         )
67 | 
68 | 
69 |     #     assert response.status_code == 201
70 |         
71 |     #     assert b"Review saved" in response.data
72 | 
73 | if __name__=='__main__':
74 |     unittest.main()


--------------------------------------------------------------------------------
/src/client/src/logo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>


--------------------------------------------------------------------------------
/requirements-python3.10.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.8.4
  2 | aiosignal==1.3.1
  3 | appnope==0.1.3
  4 | asttokens==2.2.1
  5 | async-timeout==4.0.2
  6 | attrs==22.2.0
  7 | backcall==0.2.0
  8 | captum==0.6.0
  9 | certifi==2022.12.7
 10 | cffi==1.15.1
 11 | charset-normalizer==3.0.1
 12 | click==8.0.4
 13 | coloredlogs==15.0.1
 14 | comm==0.1.2
 15 | contourpy==1.0.7
 16 | cryptography==3.4.8
 17 | cycler==0.11.0
 18 | datasets==2.10.0
 19 | debugpy==1.6.6
 20 | decorator==5.1.1
 21 | dill==0.3.6
 22 | enum-compat==0.0.3
 23 | executing==1.2.0
 24 | filelock==3.9.0
 25 | Flask==2.2.3
 26 | Flask-Cors==3.0.10
 27 | Flask-OpenTracing==1.1.0
 28 | flatbuffers==1.12
 29 | fonttools==4.38.0
 30 | frozenlist==1.3.3
 31 | fsspec==2023.1.0
 32 | grpcio==1.51.3
 33 | grpcio-opentracing==1.1.4
 34 | grpcio-reflection==1.34.1
 35 | gunicorn==20.1.0
 36 | huggingface-hub==0.12.1
 37 | humanfriendly==10.0
 38 | idna==3.4
 39 | ipykernel==6.21.2
 40 | ipython==8.10.0
 41 | itsdangerous==2.1.2
 42 | jaeger-client==4.4.0
 43 | jedi==0.18.2
 44 | Jinja2==3.1.2
 45 | jsonschema==3.2.0
 46 | jupyter_client==8.0.3
 47 | jupyter_core==5.2.0
 48 | kiwisolver==1.4.4
 49 | MarkupSafe==2.1.2
 50 | matplotlib==3.7.0
 51 | matplotlib-inline==0.1.6
 52 | mpmath==1.2.1
 53 | multidict==6.0.4
 54 | multiprocess==0.70.14
 55 | nest-asyncio==1.5.6
 56 | numpy==1.23.5
 57 | onnx==1.13.1
 58 | onnxruntime==1.13.1
 59 | onnxruntime-tools==1.7.0
 60 | opentracing==2.4.0
 61 | optimum==1.6.4
 62 | ort-nightly==1.11.0.dev20220320001
 63 | packaging==23.0
 64 | pandas==1.5.3
 65 | parso==0.8.3
 66 | pexpect==4.8.0
 67 | pickleshare==0.7.5
 68 | Pillow==9.4.0
 69 | platformdirs==3.0.0
 70 | prometheus-client==0.8.0
 71 | prompt-toolkit==3.0.37
 72 | protobuf==3.20.3
 73 | psutil==5.9.4
 74 | ptyprocess==0.7.0
 75 | pure-eval==0.2.2
 76 | py-cpuinfo==9.0.0
 77 | py3nvml==0.2.7
 78 | pyarrow==11.0.0
 79 | pycodestyle==2.10.0
 80 | pycparser==2.21
 81 | Pygments==2.14.0
 82 | pyparsing==3.0.9
 83 | pyrsistent==0.19.3
 84 | python-dateutil==2.8.2
 85 | pytz==2022.7.1
 86 | PyYAML==5.4.1
 87 | pyzmq==25.0.0
 88 | regex==2022.10.31
 89 | requests==2.28.2
 90 | responses==0.18.0
 91 | seldon-core==1.15.0
 92 | sentencepiece==0.1.97
 93 | six==1.16.0
 94 | stack-data==0.6.2
 95 | sympy==1.11.1
 96 | threadloop==1.0.2
 97 | thrift==0.16.0
 98 | tokenizers==0.13.2
 99 | torch==1.13.1
100 | torch-model-archiver==0.7.1
101 | torch-workflow-archiver==0.2.7
102 | torchserve==0.7.1
103 | tornado==6.2
104 | tqdm==4.64.1
105 | traitlets==5.9.0
106 | transformers==4.26.1
107 | typing_extensions==4.5.0
108 | urllib3==1.26.14
109 | wcwidth==0.2.6
110 | Werkzeug==2.2.3
111 | xmltodict==0.13.0
112 | xxhash==3.2.0
113 | yarl==1.8.2
114 | 


--------------------------------------------------------------------------------
/src/m_to_m_models/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.8.4
  2 | aiosignal==1.3.1
  3 | appnope==0.1.3
  4 | asttokens==2.2.1
  5 | async-timeout==4.0.2
  6 | attrs==22.2.0
  7 | backcall==0.2.0
  8 | captum==0.6.0
  9 | certifi==2022.12.7
 10 | cffi==1.15.1
 11 | charset-normalizer==3.0.1
 12 | click==8.0.4
 13 | coloredlogs==15.0.1
 14 | comm==0.1.2
 15 | contourpy==1.0.7
 16 | cryptography==3.4.8
 17 | cycler==0.11.0
 18 | datasets==2.10.0
 19 | debugpy==1.6.6
 20 | decorator==5.1.1
 21 | dill==0.3.6
 22 | enum-compat==0.0.3
 23 | executing==1.2.0
 24 | filelock==3.9.0
 25 | Flask==2.2.3
 26 | Flask-Cors==3.0.10
 27 | Flask-OpenTracing==1.1.0
 28 | flatbuffers==1.12
 29 | fonttools==4.38.0
 30 | frozenlist==1.3.3
 31 | fsspec==2023.1.0
 32 | grpcio==1.51.3
 33 | grpcio-opentracing==1.1.4
 34 | grpcio-reflection==1.34.1
 35 | gunicorn==20.1.0
 36 | huggingface-hub==0.12.1
 37 | humanfriendly==10.0
 38 | idna==3.4
 39 | ipykernel==6.21.2
 40 | ipython==8.10.0
 41 | itsdangerous==2.1.2
 42 | jaeger-client==4.4.0
 43 | jedi==0.18.2
 44 | Jinja2==3.1.2
 45 | jsonschema==3.2.0
 46 | jupyter_client==8.0.3
 47 | jupyter_core==5.2.0
 48 | kiwisolver==1.4.4
 49 | MarkupSafe==2.1.2
 50 | matplotlib==3.7.0
 51 | matplotlib-inline==0.1.6
 52 | mpmath==1.2.1
 53 | multidict==6.0.4
 54 | multiprocess==0.70.14
 55 | nest-asyncio==1.5.6
 56 | numpy==1.23.5
 57 | onnx==1.13.1
 58 | onnxruntime==1.13.1
 59 | onnxruntime-tools==1.7.0
 60 | opentracing==2.4.0
 61 | optimum==1.6.4
 62 | ort-nightly==1.11.0.dev20220320001
 63 | packaging==23.0
 64 | pandas==1.5.3
 65 | parso==0.8.3
 66 | pexpect==4.8.0
 67 | pickleshare==0.7.5
 68 | Pillow==9.4.0
 69 | platformdirs==3.0.0
 70 | prometheus-client==0.8.0
 71 | prompt-toolkit==3.0.37
 72 | protobuf
 73 | psutil==5.9.4
 74 | ptyprocess==0.7.0
 75 | pure-eval==0.2.2
 76 | py-cpuinfo==9.0.0
 77 | py3nvml==0.2.7
 78 | pyarrow==11.0.0
 79 | pycodestyle==2.10.0
 80 | pycparser==2.21
 81 | Pygments==2.14.0
 82 | pyparsing==3.0.9
 83 | pyrsistent==0.19.3
 84 | python-dateutil==2.8.2
 85 | pytz==2022.7.1
 86 | PyYAML==5.4.1
 87 | pyzmq==25.0.0
 88 | regex==2022.10.31
 89 | requests==2.28.2
 90 | responses==0.18.0
 91 | seldon-core==1.15.0
 92 | sentencepiece==0.1.97
 93 | six==1.16.0
 94 | stack-data==0.6.2
 95 | sympy==1.11.1
 96 | threadloop==1.0.2
 97 | thrift==0.16.0
 98 | tokenizers==0.13.2
 99 | torch==1.13.1
100 | torch-model-archiver==0.7.1
101 | torch-workflow-archiver==0.2.7
102 | torchserve==0.7.1
103 | tornado==6.2
104 | tqdm==4.64.1
105 | traitlets==5.9.0
106 | transformers==4.26.1
107 | typing_extensions==4.5.0
108 | urllib3==1.26.14
109 | wcwidth==0.2.6
110 | Werkzeug==2.2.3
111 | xmltodict==0.13.0
112 | xxhash==3.2.0
113 | yarl==1.8.2
114 | 


--------------------------------------------------------------------------------
/src/client/src/App.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import {
 3 |   BrowserRouter as Router,
 4 |   Switch,
 5 |   Route
 6 | } from "react-router-dom";
 7 | import { Navbar, Nav, Container, Jumbotron, Image, Row, Col } from 'react-bootstrap'
 8 | 
 9 | import Home from './pages/Home';
10 | import About from './pages/About';
11 | import FAQPage from './pages/Faq';
12 | import image from './images/masakhane-border.png';
13 | 
14 | 
15 | function App() {
16 |   return (
17 |     <Router>
18 |       <div>
19 |         <Navbar style={{ backgroundColor: '#F2F0E9', width: '100%' }} >
20 |           <Navbar.Brand href="#home" variant="dark" style={{ fontFamily: 'lato', color: 'grey'}}>Masakhane</Navbar.Brand>
21 |           <Navbar.Toggle aria-controls="basic-navbar-nav" />
22 |           <Navbar.Collapse id="basic-navbar-nav" className="justify-content-start">
23 |             <Nav className="ml-auto">
24 |             <Nav.Link href="/">Home</Nav.Link>
25 |                 <Nav.Link href="/about">About</Nav.Link>
26 |                 <Nav.Link href="/faq">FAQ</Nav.Link>
27 |             </Nav>
28 |           </Navbar.Collapse>
29 |         </Navbar>
30 |         <Jumbotron xs={12} style={{ backgroundColor: '#F2F0E9', paddingTop: '50px', paddingBottom: '50px',backgroundSize: 'cover', backgroundSize: 'cover'}} fluid>
31 |           <Container style={{display:'flex', flexDirection:'row', alignItems:'center', justifyContent:'center'}}>
32 |             <Image src={image}  className="d-none d-sm-block" width='240' height='250' roundedCircle style={{position:"absolute", left:0, right:0}}/>
33 |             <Row xs={12} md={8} style={{display:'flex', flexDirection:'column' ,justifyContent:'center', alignItems:'center'}}>
34 |               <h1 style={{ fontFamily: 'lato, sans-serif', fontWeight: 'lighter', fontSize: 80 }}>Masakhane</h1>
35 |               <p>Machine translation service for African languages</p>
36 |             </Row>
37 |           </Container>
38 |         </Jumbotron>
39 | 
40 |         <Switch>
41 |             <Route exact path="/">
42 |               <Home />
43 |             </Route>
44 |             <Route path="/about">
45 |               <About />
46 |             </Route>
47 |             <Route path="/faq">
48 |               <FAQPage />
49 |             </Route>
50 |           </Switch>
51 |         {/* <Container className="my-4">
52 |           <br />
53 |           <br />
54 |           <TranslateCard />
55 |           <br />
56 |           <p style={{fontSize: 12, color: 'gray'}}>This is a community research project. Read more about it <span style={{color: 'blue'}}>here</span></p>
57 |         </Container> */}
58 |       </div>
59 |     </Router>
60 |   );
61 | }
62 | 
63 | export default App;
64 | 


--------------------------------------------------------------------------------
/src/client/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <!-- Global site tag (gtag.js) - Google Analytics -->
 5 |     <script async src="https://www.googletagmanager.com/gtag/js?id=G-WFFYQ86SD5%22%3E"></script>
 6 |     <script>
 7 |       window.dataLayer = window.dataLayer || [];
 8 |       function gtag(){dataLayer.push(arguments);}
 9 |       gtag('js', new Date());
10 |     
11 |       gtag('config', 'G-WFFYQ86SD5');
12 |     </script>
13 |     <meta charset="utf-8" />
14 |     <!-- <meta http-equiv="Content-Security-Policy" content="default-src 'self'; img-src https://*; child-src 'none';"> -->
15 |     <link rel="icon" href="../images/favico.png" />
16 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
17 |     <meta name="theme-color" content="#000000" />
18 |     <meta
19 |       name="description"
20 |       content="Machine Translation Service for African Languages"
21 |     />
22 |     <link rel="apple-touch-icon" href="../images/favico.png" />
23 |     <link href="https://fonts.googleapis.com/css2?family=Lato:wght@300&display=swap" rel="stylesheet">
24 |     <link
25 |       rel="stylesheet"
26 |       href="https://maxcdn.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css"
27 |       integrity="sha384-9aIt2nRpC12Uk9gS9baDl411NQApFmC26EwAOH8WgZl5MYYxFfc+NcPb1dKGj7Sk"
28 |       crossorigin="anonymous"
29 |     />
30 |     <!-- icons -->
31 |     <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.7.0/css/all.css" integrity="sha384-lZN37f5QGtY3VHgisS14W3ExzMWZxybE1SJSEsQp9S+oqd12jhcu+A56Ebc1zFSJ" crossorigin="anonymous">
32 |     <!--
33 |       manifest.json provides metadata used when your web app is installed on a
34 |       user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
35 |     -->
36 |     <!-- <link rel="manifest" href="./manifest.json" /> -->
37 |     <!--
38 |       Notice the use of %PUBLIC_URL% in the tags above.
39 |       It will be replaced with the URL of the `public` folder during the build.
40 |       Only files inside the `public` folder can be referenced from the HTML.
41 | 
42 |       Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
43 |       work correctly both with client-side routing and a non-root public URL.
44 |       Learn how to configure a non-root public URL by running `npm run build`.
45 |     -->
46 |     <title>Masakhane Web</title>
47 |   </head>
48 |   <body>
49 |     <noscript>You need to enable JavaScript to run this app.</noscript>
50 |     <div id="root"></div>
51 |     <!--
52 |       This HTML file is a template.
53 |       If you open it directly in the browser, you will see an empty page.
54 | 
55 |       You can add webfonts, meta tags, or analytics to this file.
56 |       The build step will place the bundled scripts into the <body> tag.
57 | 
58 |       To begin the development, run `npm start` or `yarn start`.
59 |       To create a production bundle, use `npm run build` or `yarn build`.
60 |     -->
61 |     <script src="./bundle.js"></script>
62 |   </body>
63 | </html>
64 | 


--------------------------------------------------------------------------------
/src/client/public/217.bundle.js:
--------------------------------------------------------------------------------
1 | (self.webpackChunkmasakhane=self.webpackChunkmasakhane||[]).push([[217],{217:function(t,n,e){"use strict";e.r(n),e.d(n,{getCLS:function(){return m},getFCP:function(){return g},getFID:function(){return h},getLCP:function(){return y},getTTFB:function(){return F}});var i,a,r=function(){return"".concat(Date.now(),"-").concat(Math.floor(8999999999999*Math.random())+1e12)},o=function(t){var n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:-1;return{name:t,value:n,delta:0,entries:[],id:r(),isFinal:!1}},u=function(t,n){try{if(PerformanceObserver.supportedEntryTypes.includes(t)){var e=new PerformanceObserver((function(t){return t.getEntries().map(n)}));return e.observe({type:t,buffered:!0}),e}}catch(t){}},s=!1,c=!1,f=function(t){s=!t.persisted},l=function(){addEventListener("pagehide",f),addEventListener("beforeunload",(function(){}))},p=function(t){var n=arguments.length>1&&void 0!==arguments[1]&&arguments[1];c||(l(),c=!0),addEventListener("visibilitychange",(function(n){var e=n.timeStamp;"hidden"===document.visibilityState&&t({timeStamp:e,isUnloading:s})}),{capture:!0,once:n})},d=function(t,n,e,i){var a;return function(){e&&n.isFinal&&e.disconnect(),n.value>=0&&(i||n.isFinal||"hidden"===document.visibilityState)&&(n.delta=n.value-(a||0),(n.delta||n.isFinal||void 0===a)&&(t(n),a=n.value))}},m=function(t){var n,e=arguments.length>1&&void 0!==arguments[1]&&arguments[1],i=o("CLS",0),a=function(t){t.hadRecentInput||(i.value+=t.value,i.entries.push(t),n())},r=u("layout-shift",a);r&&(n=d(t,i,r,e),p((function(t){var e=t.isUnloading;r.takeRecords().map(a),e&&(i.isFinal=!0),n()})))},v=function(){return void 0===i&&(i="hidden"===document.visibilityState?0:1/0,p((function(t){var n=t.timeStamp;return i=n}),!0)),{get timeStamp(){return i}}},g=function(t){var n,e=o("FCP"),i=v(),a=u("paint",(function(t){"first-contentful-paint"===t.name&&t.startTime<i.timeStamp&&(e.value=t.startTime,e.isFinal=!0,e.entries.push(t),n())}));a&&(n=d(t,e,a))},h=function(t){var n=o("FID"),e=v(),i=function(t){t.startTime<e.timeStamp&&(n.value=t.processingStart-t.startTime,n.entries.push(t),n.isFinal=!0,r())},a=u("first-input",i),r=d(t,n,a);a?p((function(){a.takeRecords().map(i),a.disconnect()}),!0):window.perfMetrics&&window.perfMetrics.onFirstInputDelay&&window.perfMetrics.onFirstInputDelay((function(t,i){i.timeStamp<e.timeStamp&&(n.value=t,n.isFinal=!0,n.entries=[{entryType:"first-input",name:i.type,target:i.target,cancelable:i.cancelable,startTime:i.timeStamp,processingStart:i.timeStamp+t}],r())}))},S=function(){return a||(a=new Promise((function(t){return["scroll","keydown","pointerdown"].map((function(n){addEventListener(n,t,{once:!0,passive:!0,capture:!0})}))}))),a},y=function(t){var n,e=arguments.length>1&&void 0!==arguments[1]&&arguments[1],i=o("LCP"),a=v(),r=function(t){var e=t.startTime;e<a.timeStamp?(i.value=e,i.entries.push(t)):i.isFinal=!0,n()},s=u("largest-contentful-paint",r);if(s){n=d(t,i,s,e);var c=function(){i.isFinal||(s.takeRecords().map(r),i.isFinal=!0,n())};S().then(c),p(c,!0)}},F=function(t){var n,e=o("TTFB");n=function(){try{var n=performance.getEntriesByType("navigation")[0]||function(){var t=performance.timing,n={entryType:"navigation",startTime:0};for(var e in t)"navigationStart"!==e&&"toJSON"!==e&&(n[e]=Math.max(t[e]-t.navigationStart,0));return n}();e.value=e.delta=n.responseStart,e.entries=[n],e.isFinal=!0,t(e)}catch(t){}},"complete"===document.readyState?setTimeout(n,0):addEventListener("pageshow",n)}}}]);


--------------------------------------------------------------------------------
/src/server/manage.py:
--------------------------------------------------------------------------------
  1 | #external imports
  2 | 
  3 | import click, json, os, unittest
  4 | from flask.cli import FlaskGroup
  5 | # internal imports
  6 | from core.extensions import db
  7 | from core import masakhane
  8 | from core.models.language import Language
  9 | 
 10 | 
 11 | cli = FlaskGroup(masakhane)
 12 | 
 13 | 
 14 | @cli.command("create_db")
 15 | def create_db():
 16 |     """User defined Flask CLI command
 17 |     ---
 18 |     This command will create and commit the available database tables"""
 19 |     db.create_all()
 20 |     db.session.commit()
 21 | 
 22 | 
 23 | @cli.command("clean")
 24 | def clean():
 25 |     """User defined Flask CLI command
 26 |     ---
 27 |     This command will DELETE & re-create the database"""
 28 |     db.drop_all()
 29 |     db.create_all()
 30 |     db.session.commit()
 31 | 
 32 | 
 33 | @cli.command("all_languages")
 34 | def all_languages():
 35 |     """User defined Flask CLI command
 36 |     ---
 37 |     This command will list the translation model info in the database"""
 38 |     for lan in Language.query.all():
 39 |         print(lan.to_json())
 40 | 
 41 | 
 42 | @cli.command("add_language")
 43 | @click.argument('name_tag')
 44 | def add_language(name_tag):
 45 |     """User defined Flask CLI command
 46 |     ---
 47 |     This command will add a language into the database\n
 48 |     The parameter is of the form `src-trg-domain`, ie. `en-sw-JW300` || `en-tiv-`
 49 |     """
 50 |     with open(os.environ.get('JSON',
 51 |                              "./languages.json"), 'r') as f:
 52 |         distros_dict = json.load(f)
 53 | 
 54 |     languages_short_to_full = {}
 55 |     languages_full_to_short = {}
 56 | 
 57 |     for distro in distros_dict:
 58 |         languages_short_to_full[distro['language_short'].lower(
 59 |         )] = distro['language_en'].lower()
 60 |         languages_full_to_short[distro['language_en'].lower(
 61 |         )] = distro['language_short'].lower()
 62 | 
 63 |     source, target, domain = name_tag.split('-')
 64 |     language = Language(src_tgt_dmn=name_tag,
 65 |                         source_target_domain=f"{languages_short_to_full[source]}-{languages_short_to_full[target]}-{domain}")
 66 | 
 67 |     db.session.merge(language)
 68 |     db.session.commit()
 69 | 
 70 | 
 71 | @cli.command("remove_language")
 72 | @click.argument('name_tag')
 73 | def remove_language(name_tag):
 74 |     """User defined Flask CLI command
 75 |     ---
 76 |     This command will remove a language from the database\n
 77 |     The parameter is of the form `src-trg-domain`, ie. `en-sw-JW300` || `en-tiv-`
 78 |     """
 79 |     try:
 80 |         language = Language.query.filter_by(
 81 |             src_tgt_dmn=name_tag).first_or_404()
 82 | 
 83 |         print(language)
 84 |         db.session.delete(language)
 85 |         db.session.commit()
 86 |     except Exception as e:
 87 |         pass
 88 | 
 89 | 
 90 | @cli.command("tests")
 91 | def tests():
 92 |     """User defined Flask CLI command
 93 |     --- 
 94 |     Runs the tests without code coverage """
 95 | 
 96 |     loader = unittest.defaultTestLoader
 97 |     suite = unittest.TestSuite()
 98 | 
 99 |     for test_module in loader.discover('core/tests', pattern='test*.py'):
100 |         suite.addTest(test_module)
101 | 
102 |     result = unittest.TextTestRunner(verbosity=1).run(suite)
103 | 
104 |     if result.wasSuccessful():
105 |         return 0
106 |     return 1
107 |     # return tests
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     cli()
112 | 


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
  1 | name: joenmt
  2 | channels:
  3 |   - defaults
  4 | dependencies:
  5 |   - _libgcc_mutex=0.1=main
  6 |   - ca-certificates=2020.12.8=h06a4308_0
  7 |   - certifi=2020.12.5=py36h06a4308_0
  8 |   - ld_impl_linux-64=2.33.1=h53a641e_7
  9 |   - libedit=3.1.20191231=h14c3975_1
 10 |   - libffi=3.3=he6710b0_2
 11 |   - libgcc-ng=9.1.0=hdf63c60_0
 12 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 13 |   - ncurses=6.2=he6710b0_1
 14 |   - openssl=1.1.1h=h7b6447c_0
 15 |   - pip=20.3.1=py36h06a4308_0
 16 |   - python=3.6.12=hcff3b4d_2
 17 |   - readline=8.0=h7b6447c_0
 18 |   - setuptools=51.0.0=py36h06a4308_2
 19 |   - sqlite=3.33.0=h62c20be_0
 20 |   - tk=8.6.10=hbc83047_0
 21 |   - wheel=0.36.1=pyhd3eb1b0_0
 22 |   - xz=5.2.5=h7b6447c_0
 23 |   - zlib=1.2.11=h7b6447c_3
 24 |   - pip:
 25 |     - absl-py==0.11.0
 26 |     - aniso8601==8.1.0
 27 |     - astroid==2.4.2
 28 |     - astunparse==1.6.3
 29 |     - backcall==0.2.0
 30 |     - cachetools==4.1.1
 31 |     - chardet==3.0.4
 32 |     - click==7.1.2
 33 |     - cycler==0.10.0
 34 |     - decorator==4.4.2
 35 |     - filelock==3.0.12
 36 |     - flask==1.0.3
 37 |     - flask-cors==3.0.9
 38 |     - flask-restful==0.3.7
 39 |     - future==0.18.2
 40 |     - gast==0.3.3
 41 |     - gdown==3.12.2
 42 |     - google-auth==1.23.0
 43 |     - google-auth-oauthlib==0.4.2
 44 |     - google-pasta==0.2.0
 45 |     - grpcio==1.34.0
 46 |     - gunicorn==20.0.4
 47 |     - h5py==2.10.0
 48 |     - httpie==1.0.3
 49 |     - idna==2.10
 50 |     - importlib-metadata==3.1.1
 51 |     - ipdb==0.13.4
 52 |     - ipython==7.16.1
 53 |     - ipython-genutils==0.2.0
 54 |     - isort==5.6.4
 55 |     - itsdangerous==1.1.0
 56 |     - jedi==0.17.2
 57 |     - jinja2==2.11.2
 58 |     - joblib==0.17.0
 59 |     - joeynmt==1.0
 60 |     - keras-preprocessing==1.1.2
 61 |     - kiwisolver==1.3.1
 62 |     - lazy-object-proxy==1.4.3
 63 |     - markdown==3.3.3
 64 |     - markupsafe==1.1.1
 65 |     - matplotlib==3.3.3
 66 |     - mccabe==0.6.1
 67 |     - morfessor==2.0.6
 68 |     - numpy==1.18.5
 69 |     - oauthlib==3.1.0
 70 |     - opt-einsum==3.3.0
 71 |     - pandas==1.1.5
 72 |     - parso==0.7.1
 73 |     - pexpect==4.8.0
 74 |     - pickleshare==0.7.5
 75 |     - pillow==8.0.1
 76 |     - polyglot==16.7.4
 77 |     - portalocker==2.0.0
 78 |     - prompt-toolkit==3.0.8
 79 |     - protobuf==3.14.0
 80 |     - ptyprocess==0.6.0
 81 |     - pyasn1==0.4.8
 82 |     - pyasn1-modules==0.2.8
 83 |     - pycld2==0.41
 84 |     - pygments==2.7.3
 85 |     - pyicu==2.3.1
 86 |     - pylint==2.6.0
 87 |     - pyparsing==2.4.7
 88 |     - pysocks==1.7.1
 89 |     - pyter3==0.3
 90 |     - python-dateutil==2.8.1
 91 |     - pytz==2020.4
 92 |     - pyyaml==5.3.1
 93 |     - regex==2020.11.13
 94 |     - requests==2.25.0
 95 |     - requests-oauthlib==1.3.0
 96 |     - rsa==4.6
 97 |     - sacrebleu==1.4.14
 98 |     - sacremoses==0.0.43
 99 |     - scipy==1.5.4
100 |     - seaborn==0.11.0
101 |     - sentencepiece==0.1.94
102 |     - six==1.12.0
103 |     - subword-nmt==0.3.7
104 |     - tensorboard==2.4.0
105 |     - tensorboard-plugin-wit==1.7.0
106 |     - tensorflow==2.3.1
107 |     - tensorflow-estimator==2.3.0
108 |     - termcolor==1.1.0
109 |     - toml==0.10.2
110 |     - torch==1.6.0
111 |     - torchtext==0.7.0
112 |     - tqdm==4.54.1
113 |     - traitlets==4.3.3
114 |     - typed-ast==1.4.1
115 |     - urllib3==1.26.2
116 |     - wcwidth==0.2.5
117 |     - werkzeug==1.0.1
118 |     - wrapt==1.11.1
119 |     - wtforms==2.3.3
120 |     - zipp==3.4.0
121 | prefix: /home/salomon/anaconda3/envs/joenmt
122 | 
123 | 


--------------------------------------------------------------------------------
/src/server/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # The Backend 
  3 | The server contains the methods and extensions to support Masakhane Web
  4 | 
  5 | ## Table of contents
  6 | - [API](#api)
  7 |   - [**Endpoints**](#endpoints)
  8 |     - [**GET**](#get)
  9 |     - [**POST**](#post)
 10 | - [Manage CLI](#manage-cli)
 11 | - [Tests](#tests)
 12 | 
 13 | # API
 14 | This is the REST API of the Masakhane Web App.
 15 | 
 16 | API for [Masakhane-MT](https://github.com/masakhane-io/masakhane-mt) machine translation models for African languages to translate
 17 | 
 18 | ## **Endpoints**
 19 | 
 20 | The server is running on http://localhost:5000  
 21 | 
 22 | ### **GET**
 23 | 
 24 | <table style='width:100%;'>
 25 | <tr>
 26 | <th> Endpoint </th><th> Description </th><th> Returns (on success) </th> 
 27 | </tr>
 28 | <td>
 29 | 
 30 | `/`
 31 | 
 32 | </td><td> The base endpoint </td>
 33 | <td> 
 34 | 
 35 | ```json
 36 | {
 37 |     "message": "welcome Masakhane Web" 
 38 | }
 39 | ```
 40 | 
 41 | </td> 
 42 | <tr>
 43 | <td> 
 44 | 
 45 | `/translate` 
 46 | 
 47 | </td><td> Lists the saved models </td>
 48 | <td> 
 49 | 
 50 | ```json
 51 | [
 52 |     {
 53 |         "type": "source",
 54 |         "name": "English",
 55 |         "value": "en",
 56 |         "targets": [
 57 |             {
 58 |                 "name": "Swahili",
 59 |                 "value": "sw"
 60 |             }
 61 |         ]
 62 |     }
 63 | ]
 64 | ```
 65 | 
 66 | </td> 
 67 | </tr>
 68 | <tr>
 69 | <td>
 70 | 
 71 | `/update`
 72 | 
 73 | </td><td> Updates the local database with the newly loaded models </td>
 74 | <td> 
 75 | 
 76 | ```json
 77 | {
 78 |     "message": "models updated" 
 79 | }
 80 | ```
 81 | 
 82 | </td> 
 83 | </tr>
 84 | </table>
 85 | 
 86 | ### **POST**
 87 | 
 88 | <table style='width:100%;'>
 89 | <tr>
 90 | <th> Endpoint </th><th> Description </th><th> Example Body </th><th> Returns (on success) </th> 
 91 | </tr>
 92 | <tr>
 93 | <td>
 94 | 
 95 | `/translate`
 96 | 
 97 | </td><td> Returns the translated text </td>
 98 | <td> 
 99 | 
100 | ```json
101 | {
102 |   "src_lang": "english",
103 |   "tgt_lang": "swahili",
104 |   "input":    "how are you?"
105 | }
106 | ```
107 | 
108 | </td> 
109 | <td> 
110 | 
111 | ```json
112 | {
113 |     "src_lang": "english",
114 |     "tgt_lang": "swahili",
115 |     "input": "Hello, how are you?",
116 |     "output": "kwa ukunjufu"
117 | }
118 | ```
119 | 
120 | </td>
121 | </tr>
122 | <tr>
123 | <td>
124 | 
125 | `/save`
126 | 
127 | </td><td> Saves the translation feedback </td>
128 | <td> 
129 | 
130 | ```json
131 | {
132 |   "srcX_lang": "english",
133 |   "tgt_lang": "swahili",
134 |   "input": "Hello, how are you?",
135 |   "review": "translation correction",
136 |   "stars": "translation confidence",
137 |   "token": "user auth (bool)"
138 | }
139 | ```
140 | 
141 | </td> 
142 | <td> 
143 | 
144 | ```json
145 | {
146 |     "message": "Review saved",
147 | }
148 | ```
149 | 
150 | </td>
151 | </tr>
152 | <tr></tr>
153 | </table>
154 | 
155 | # Manage CLI
156 | There is a cli program for managing the server - it is in [src/server/manage.py]()
157 | 
158 | The command format is: 
159 | ```bash 
160 | python manage.py command optional_parameter
161 | ```
162 | 
163 | | Command | Parameter | Description |
164 | | ------- | --------- | ----------- |
165 | | `create_db` | none | Creates database tables for the db models Language & Feedback
166 | | `all_languages` | none | Lists the model info stored in the Language table
167 | | `add_language` | `name_tag` | Adds a language with a given name_tag, ie - `en-sw-JW300 OR en-tiv-`|
168 | | `remove_language` | `name_tag`| Removes a language with a given name_tag |
169 | | `clean` | none | Deletes and recreates an empty database |
170 | | `tests` | none | Runs the backend tests |
171 | 
172 | # Tests
173 | 
174 | **TODO**


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Masakhane WEB - A Machine Translation Web Platform for African Languages
  2 | 
  3 | <div  align="center">
  4 | <img src="https://pbs.twimg.com/profile_images/1255858628986384384/d7Lk9I-w_400x400.jpg">
  5 | </div>
  6 | 
  7 | 
  8 | [**Masakhane**](https://www.masakhane.io/) meaning ‘we build together’, is a research effort for machine translation for African languages which is open source and online. So far, the community has built translation models based on [Joey NMT](https://github.com/joeynmt/joeynmt) for over 38 African languages. As such, **Masakhane Web** is a platform that aims to host the already trained models from the community and allow contributions from users to create new data for retraining. The objective of this web application is to provide access to an open-source platform that makes available relatively accurate translations for languages across Africa. If you can't find your language and/or would like to train your own machine translation model in your language, see https://github.com/masakhane-io/masakhane-mt on how you can contribute.
  9 | 
 10 | 
 11 | **Disclaimer:** This system is for research purposes only and should be taken as work in progress. None of the trained models are suitable for production usage.
 12 | 
 13 | ## Table of contents
 14 | - [Running The App](#running-the-app)
 15 | - [Contributing](#contributing)
 16 | 	- [Options](#options)
 17 | 	- [Submitting Changes\[Pull Request\]](#submitting-changespull-request)
 18 | - [Contributors](#contributors)
 19 | - [Contact Us](#contact-us)
 20 | - [License](#license)
 21 | 	- [Citing the project](#citing-the-project)
 22 | - [Acknowledgements](#acknowledgements)
 23 | 
 24 | 
 25 | # Running The App 
 26 | To run the app locally, see [here](/docs/start_app_locally_doc.md#running-the-app-locally)  
 27 | To run the app in a production, see [here](/docs/start_app_prod_doc.md#running-the-app-in-production)
 28 | 
 29 | # Contributing
 30 | 
 31 | 
 32 | ## Options
 33 | 
 34 | -  *Can't see your language as one of the supported languages: Visit [Masakhane:Building your first machine translation model](https://github.com/masakhane-io/masakhane-mt#building-your-first-machine-translation-model) to learn more about how you can train a model for your language.*
 35 | 
 36 | -  *I have an idea or a new feature: Create a new issue first, assign it to yourself and then fork the repo*
 37 | 
 38 | -  *I want to help in improving the accuracy of the models: Check out below on how you can reach out to us*
 39 | 
 40 |   
 41 | 
 42 | ## Submitting Changes[Pull Request]
 43 | 
 44 | - See [https://opensource.com/article/19/7/create-pull-request-github](https://opensource.com/article/19/7/create-pull-request-github)
 45 | 
 46 |   
 47 | 
 48 | # Contributors
 49 | 
 50 | <a  href="https://github.com/dsfsi/masakhane-web/graphs/contributors">
 51 | 
 52 | <img  src="https://contrib.rocks/image?repo=dsfsi/masakhane-web"  />
 53 | 
 54 | </a>
 55 | 
 56 |   
 57 | 
 58 | Made with [contributors-img](https://contrib.rocks).
 59 | 
 60 |   
 61 |   
 62 | 
 63 | # Contact Us
 64 | 
 65 | - Vukosi Marivate - vukosi.marivate@cs.up.ac.za
 66 | 
 67 | - Abiodun Modupe - abiodun.modupe@cs.up.ac.za
 68 | 
 69 | - Salomon Kabongo - skabenamualu@aimsammi.org
 70 | 
 71 | - Catherine Gitau - cgitau@aimsammi.org
 72 | 
 73 |   
 74 | 
 75 | # License
 76 | 
 77 | [MIT](https://mit-license.org/)
 78 | 
 79 |   
 80 | 
 81 | ## Citing the project
 82 | 
 83 | **On a visualisation/notebook/webapp:**
 84 | 
 85 | > Data Science for Social Impact Research Group @ University of Pretoria, Masakhane NLP, *Masakhane WEB - A Machine Translation Web Platform for African Languages* Available on: [https://github.com/dsfsi/masakhane-web](https://github.com/dsfsi/masakhane-web).
 86 | 
 87 | **In a publication**
 88 | Software
 89 | 
 90 | > @software { marivate_vukosi_2021_4745501, 
 91 | > author = {Marivate, Vukosi and Gitau, Catherine and Kabenamualu, Salomon and Modupe, Abiodun and Masakhane NLP},
 92 | > title = {{Masakhane WEB - A Machine Translation Web Platform for African Languages}}, 
 93 | > month = may, year = 2021, 
 94 | > publisher = {Zenodo}, 
 95 | > version = {0.9}, 
 96 | > doi = {10.5281/zenodo.4745501}, 
 97 | > url = {[https://doi.org/10.5281/zenodo.4745501](https://doi.org/10.281/zenodo.4745501)}
 98 | > }
 99 | 
100 |   
101 | 
102 | # Acknowledgements
103 | 
104 |   
105 | 
106 | We want to acknowledge support from the following organisations
107 | 
108 | - [Mozilla](https://www.mozilla.org/en-US/moss/)
109 | 
110 | - [Google Cloud Platfrom](https://cloud.google.com/)


--------------------------------------------------------------------------------
/src/client/src/components/terms.js:
--------------------------------------------------------------------------------
 1 | import { Row, Card, Button } from 'react-bootstrap';
 2 | import { v4 as uuidv4 } from 'uuid';
 3 | import React from 'react';
 4 | 
 5 | const Terms = ({ setShow, navigation, setFeedbackToken, feedbackToken}) => {
 6 |     const { next } = navigation;
 7 | 
 8 |     const accept = () => {
 9 |         if(feedbackToken !== '') {
10 |             next();
11 |         } else {
12 |             // generate token
13 |             const token = uuidv4();
14 |             // set token
15 |             localStorage.setItem('feedbackToken', token);
16 |             setFeedbackToken(token);
17 |             // proceed
18 |             next();
19 |         }
20 |     }
21 | 
22 |     const handleDecline = () => {
23 |         // close modal
24 |         setShow(false);
25 |     }
26 | 
27 |     return (
28 |         <div>
29 |             <Card style={{ width: '100%' }}>
30 |                 <Card.Body>
31 |                     <Card.Title>Terms & Conditions</Card.Title>
32 |                     <Card.Subtitle className="mb-2 text-muted">Dear Sir/Madam,</Card.Subtitle>
33 |                     <div>
34 |                         <Card.Text style={{ fontSize: 12, color: 'gray' }}>
35 |                             I am <span><b>Dr Vukosi Marivate</b></span>, principal investigator of the Data Science for Social Impact research group at the Department of Computer Science at the University of Pretoria. 
36 |                             The research project is titled <span><b>Masakhane Web Feedback Analysis for African Language Task Models</b></span>. 
37 |                             The study aims to understand the challenges in automated translation models for African languages. 
38 |                             The models themselves are sourced from the Masakhane project (our collaborators) and are all a work in progress. By better providing feedback to model designers, we can work to improve the models and conduct research on African Language Natural Language Processing. 
39 |                             The purpose of this questionnaire/feedback form is to collect information on the quality of the translations that are on the Masakhane Web system currently. 
40 |                             The user participation is voluntary, and you can withdraw at any time without penalty. 
41 |                         </Card.Text>
42 |                     </div>
43 |                     <br />
44 |                     <div>
45 |                         <Card.Text style={{ fontSize: 12, color: 'gray' }}>
46 |                             Throughout the feedback from the participants, their privacy remains confidential. 
47 |                             Hence, we only collect the following information:
48 |                             <Card.Text style={{ fontSize: 12, color: 'black' }}>
49 |                                 1. The user has the option to accept or reject to participate in the feedback survey,
50 |                             </Card.Text>
51 |                             <Card.Text style={{ fontSize: 12, color: 'black' }}>
52 |                                 2. The participants are required to indicate their level of proficiencies of the languages translated by the model, 
53 |                             </Card.Text>
54 |                             <Card.Text style={{ fontSize: 12, color: 'black' }}>
55 |                                 3. and your submitted feedback to the translations is stored on our server. No personal information is collected.
56 |                             </Card.Text>
57 |                         </Card.Text>
58 |                     </div>
59 |                     <br />
60 |                     <div>
61 |                         <Card.Text style={{ fontSize: 12, color: 'gray' }}>
62 |                             If you agree to participate, please complete the survey that follows this cover letter. 
63 |                             It should take about 5 minutes of your time at the most for feedback on each translation. 
64 |                             By completing the survey, you indicate your willingness to participate in this research.
65 | 
66 |                             If you have any concerns, please contact me with the details provided below.
67 |                             <br />
68 |                             <span><b>Dr. Vukosi Marivate</b></span>
69 |                             <br />
70 |                             <span><i>vukosi.marivate@cs.up.ac.za</i></span>
71 |                         </Card.Text>
72 |                     </div>
73 |                 </Card.Body>
74 |             </Card>
75 |             <Row style={{ padding: 20, justifyContent: 'space-between'}}>
76 |                 <div>
77 |                     <Button size="sm" variant="outline-secondary" onClick={handleDecline}>NOT NOW</Button>
78 |                 </div>
79 |                 <div>
80 |                     <Button size="sm" variant="outline-primary" onClick={accept}>ACCEPT TERMS</Button>
81 |                 </div>
82 |             </Row>
83 |         </div>
84 |     )
85 | }
86 | 
87 | export default Terms;
88 | 


--------------------------------------------------------------------------------
/src/client/src/pages/About.js:
--------------------------------------------------------------------------------
 1 | import { Container, Card } from 'react-bootstrap'
 2 | import React from 'react';
 3 | 
 4 | export default function About() {
 5 |     return(
 6 |         <div>
 7 |             <Container className="my-4">
 8 |                 <Card style={{ width: '100%' }}>
 9 |                     <Card.Body>
10 |                         <Card.Title>About</Card.Title>
11 |                         <Card.Subtitle className="mb-2 text-muted">Masakhane Web</Card.Subtitle>
12 |                         <div>
13 |                             <Card.Text style={{ fontSize: 16, color: 'black' }}>
14 |                             <b>Masakhane Web</b> is an open source online machine translation service for solely African languages.  
15 |                             This project is in line with the works of the <a id='link' href='https://www.masakhane.io/'>Masakhane community</a> .<b> Masakhane</b> meaning ‘we build together’, 
16 |                             is a research effort whose mission is to strengthen and spur NLP research for African languages which is open source and online. 
17 |                             So far, the community has trained translation models for over 38 African languages. As such, this platform aims at hosting the already trained machine translation models from the Masakhane community and allows contributions 
18 |                             from users to create new data for retraining and improving the models. <br/>
19 |                             </Card.Text>
20 |                         </div>
21 |                         <div>
22 |                             <Card.Text style={{ fontSize: 16, color: 'black' }}>
23 |                             <br />
24 |                             The Masakhane Web project is led by <a id='link' href='https://dsfsi.github.io/'>Data Science for Social Impact</a> research group at the <a id = 'link' href = 'https://cs.up.ac.za/'>Department of Computer Science</a>, University of Pretoria, South Africa. 
25 |                             </Card.Text>
26 |                             </div>
27 |                             <br/>
28 |                         <div>
29 |                         <Card.Text style={{ fontSize: 16, color: 'black' }}>
30 |                             The feedback mechanism of this project has been approved by the University of Pretoria Faculty of Engineering, <a id='link' href = 'https://www.up.ac.za/faculty-of-engineering-built-environment-it/article/15815/faculty-committee-for-research-ethics-integrity' >Built Environment and Information Technology(EBIT) Research Ethics Committee</a>. 
31 |                             </Card.Text>
32 |                         </div>
33 |                     
34 |                         <br />
35 |                         <div>
36 |                             <Card.Text style={{ fontSize: 16, color: 'black' }}>
37 |                                 If you would like to contribute to this project, train a model in your language or want to collaborate and work with Masakhane, find out how in <a id='link' href = 'https://github.com/dsfsi/masakhane-web'>https://github.com/dsfsi/masakhane-web</a> or reach out to any of the Masakhane Web contributors in the following ways:
38 |                              </Card.Text>
39 |                         </div>
40 |                         <div>
41 |                             <Card.Text style={{ fontSize: 16, color: 'gray' }}>
42 |                             <br />
43 |                             <br />
44 |                             <span><b>Dr. Vukosi Marivate</b></span>
45 |                             <br />
46 |                             <span><i>vukosi.marivate@cs.up.ac.za</i></span>
47 |                             <br />
48 |                             <a id='link' href= 'https://twitter.com/vukosi'>@vukosi</a>
49 |                             <br />
50 |                             <br />
51 |                             <span><b>Abiodun Modupe</b></span>
52 |                             <br />
53 |                             <span><i>abiodun.modupe@cs.up.ac.za </i></span>
54 |                             <br />
55 |                             <br />
56 |                             <span><b>Salomon Kabongo</b></span>
57 |                             <br />
58 |                             <span><i>skabenamualu@aimsammi.org</i></span>
59 |                             <br />
60 |                             <a id='link' href= 'https://twitter.com/SalomonKabongo'>@SalomonKabongo</a>
61 |                             <br />
62 |                             <br />
63 |                             <span><b>Catherine Gitau</b></span>
64 |                             <br />
65 |                             <span><i>cgitau@aimsammi.org</i></span>
66 |                             <br />
67 |                             <a id='link' href= 'https://twitter.com/categitau_'>@categitau_</a>
68 |                             <br />
69 | 
70 | 
71 |                             </Card.Text>
72 |                         </div>
73 |                         <br />
74 |                     </Card.Body>
75 |                 </Card>
76 |             </Container>
77 |         </div>
78 |     )
79 | }


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3
  2 | 
  3 | #################################################################################
  4 | # GLOBALS                                                                       #
  5 | #################################################################################
  6 | 
  7 | PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
  8 | BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')
  9 | PROFILE = default
 10 | PROJECT_NAME = mit-808-starter
 11 | PYTHON_INTERPRETER = python3
 12 | 
 13 | ifeq (,$(shell which conda))
 14 | HAS_CONDA=False
 15 | else
 16 | HAS_CONDA=True
 17 | endif
 18 | 
 19 | #################################################################################
 20 | # COMMANDS                                                                      #
 21 | #################################################################################
 22 | 
 23 | ## Install Python Dependencies
 24 | requirements: test_environment
 25 | 	$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
 26 | 	$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
 27 | 
 28 | ## Make Dataset
 29 | data: requirements
 30 | 	$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
 31 | 
 32 | ## Delete all compiled Python files
 33 | clean:
 34 | 	find . -type f -name "*.py[co]" -delete
 35 | 	find . -type d -name "__pycache__" -delete
 36 | 
 37 | ## Lint using flake8
 38 | lint:
 39 | 	flake8 src
 40 | 
 41 | ## Upload Data to S3
 42 | sync_data_to_s3:
 43 | ifeq (default,$(PROFILE))
 44 | 	aws s3 sync data/ s3://$(BUCKET)/data/
 45 | else
 46 | 	aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE)
 47 | endif
 48 | 
 49 | ## Download Data from S3
 50 | sync_data_from_s3:
 51 | ifeq (default,$(PROFILE))
 52 | 	aws s3 sync s3://$(BUCKET)/data/ data/
 53 | else
 54 | 	aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE)
 55 | endif
 56 | 
 57 | ## Set up python interpreter environment
 58 | create_environment:
 59 | ifeq (True,$(HAS_CONDA))
 60 | 		@echo ">>> Detected conda, creating conda environment."
 61 | ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
 62 | 	conda create --name $(PROJECT_NAME) python=3
 63 | else
 64 | 	conda create --name $(PROJECT_NAME) python=2.7
 65 | endif
 66 | 		@echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)"
 67 | else
 68 | 	$(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
 69 | 	@echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
 70 | 	export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
 71 | 	@bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
 72 | 	@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
 73 | endif
 74 | 
 75 | ## Test python environment is setup correctly
 76 | test_environment:
 77 | 	$(PYTHON_INTERPRETER) test_environment.py
 78 | 
 79 | #################################################################################
 80 | # PROJECT RULES                                                                 #
 81 | #################################################################################
 82 | 
 83 | 
 84 | 
 85 | #################################################################################
 86 | # Self Documenting Commands                                                     #
 87 | #################################################################################
 88 | 
 89 | .DEFAULT_GOAL := help
 90 | 
 91 | # Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
 92 | # sed script explained:
 93 | # /^##/:
 94 | # 	* save line in hold space
 95 | # 	* purge line
 96 | # 	* Loop:
 97 | # 		* append newline + line to hold space
 98 | # 		* go to next line
 99 | # 		* if line starts with doc comment, strip comment character off and loop
100 | # 	* remove target prerequisites
101 | # 	* append hold space (+ newline) to line
102 | # 	* replace newline plus comments by `---`
103 | # 	* print line
104 | # Separate expressions are necessary because labels cannot be delimited by
105 | # semicolon; see <http://stackoverflow.com/a/11799865/1968>
106 | .PHONY: help
107 | help:
108 | 	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
109 | 	@echo
110 | 	@sed -n -e "/^## / { \
111 | 		h; \
112 | 		s/.*//; \
113 | 		:doc" \
114 | 		-e "H; \
115 | 		n; \
116 | 		s/^## //; \
117 | 		t doc" \
118 | 		-e "s/:.*//; \
119 | 		G; \
120 | 		s/\\n## /---/; \
121 | 		s/\\n/ /g; \
122 | 		p; \
123 | 	}" ${MAKEFILE_LIST} \
124 | 	| LC_ALL='C' sort --ignore-case \
125 | 	| awk -F '---' \
126 | 		-v ncol=$$(tput cols) \
127 | 		-v indent=19 \
128 | 		-v col_on="$$(tput setaf 6)" \
129 | 		-v col_off="$$(tput sgr0)" \
130 | 	'{ \
131 | 		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
132 | 		n = split($$2, words, " "); \
133 | 		line_length = ncol - indent; \
134 | 		for (i = 1; i <= n; i++) { \
135 | 			line_length -= length(words[i]) + 1; \
136 | 			if (line_length <= 0) { \
137 | 				line_length = ncol - indent - length(words[i]) - 1; \
138 | 				printf "\n%*s ", -indent, " "; \
139 | 			} \
140 | 			printf "%s ", words[i]; \
141 | 		} \
142 | 		printf "\n"; \
143 | 	}' \
144 | 	| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
145 | 


--------------------------------------------------------------------------------
/src/client/src/components/step1.js:
--------------------------------------------------------------------------------
  1 | import { Row, Col, Form, Button } from 'react-bootstrap';
  2 | import RadioButton from './common/radioButton';
  3 | import React from 'react';
  4 | 
  5 | const Step1 = ({ src_lang, tgt_lang, setForm, formData, navigation, handleSubmitFeedback }) => {
  6 | 
  7 |     const { know_src_lang, know_tgt_lang } = formData;
  8 |     
  9 |     const { next, go } = navigation;
 10 | 
 11 |     const handleNext= () => {
 12 |         if(know_src_lang === "none" && know_tgt_lang === "none") {
 13 |             // submit feedback
 14 |             handleSubmitFeedback();
 15 |             // then skip next step
 16 |             go("step3");
 17 |         } else {
 18 |             // go to next page
 19 |             next();
 20 |         }
 21 |     }
 22 | 
 23 |     return (
 24 |         <div>
 25 |             <div style={{textAlign: 'center'}}>
 26 |                 <h6 style={{ fontSize: 13, color: 'gray' }}>Part 1/2</h6>
 27 |             </div>
 28 | 
 29 |             <div style={{textAlign: 'center'}}>
 30 |                 <p style={{ fontSize: 13, fontWeight: 'bold' }}>How well do you know {src_lang}?</p>
 31 |                 <Form>
 32 |                     <Row>
 33 |                         <Col>
 34 |                             <RadioButton 
 35 |                                 value="none"
 36 |                                 name="know_src_lang"
 37 |                                 label="Not at all"
 38 |                                 selected={know_src_lang}
 39 |                                 onChange={setForm} 
 40 |                             />
 41 |                         </Col>
 42 |                         <Col>
 43 |                             <RadioButton 
 44 |                                 value="little"
 45 |                                 name="know_src_lang" 
 46 |                                 label="A little bit"
 47 |                                 selected={know_src_lang}
 48 |                                 onChange={setForm} 
 49 |                             />
 50 |                         </Col>
 51 |                         <Col>
 52 |                             <RadioButton 
 53 |                                 value="decent"
 54 |                                 name="know_src_lang" 
 55 |                                 label="A decent amount"
 56 |                                 selected={know_src_lang}
 57 |                                 onChange={setForm} 
 58 |                             />
 59 |                         </Col>
 60 |                         <Col>
 61 |                             <RadioButton 
 62 |                                 value="well"
 63 |                                 name="know_src_lang" 
 64 |                                 label="Very well"
 65 |                                 selected={know_src_lang}
 66 |                                 onChange={setForm} 
 67 |                             />
 68 |                         </Col>
 69 |                     </Row>
 70 |                 </Form>
 71 |             </div>
 72 | 
 73 |             <br />
 74 |             <hr />
 75 |             <br />
 76 | 
 77 |             <div style={{textAlign: 'center'}}>
 78 |                 <p style={{ fontSize: 13, fontWeight: 'bold' }}>How well do you know {tgt_lang}?</p>
 79 |                 <Form>
 80 |                     <Row>
 81 |                         <Col>
 82 |                             <RadioButton 
 83 |                                 value="none"
 84 |                                 name="know_tgt_lang"
 85 |                                 label="Not at all"
 86 |                                 selected={know_tgt_lang}
 87 |                                 onChange={setForm} 
 88 |                             />
 89 |                         </Col>
 90 |                         <Col>
 91 |                             <RadioButton 
 92 |                                 value="little"
 93 |                                 name="know_tgt_lang" 
 94 |                                 label="A little bit"
 95 |                                 selected={know_tgt_lang}
 96 |                                 onChange={setForm} 
 97 |                             />
 98 |                         </Col>
 99 |                         <Col>
100 |                             <RadioButton 
101 |                                 value="decent"
102 |                                 name="know_tgt_lang" 
103 |                                 label="A decent amount"
104 |                                 selected={know_tgt_lang}
105 |                                 onChange={setForm} 
106 |                             />
107 |                         </Col>
108 |                         <Col>
109 |                             <RadioButton 
110 |                                 value="well"
111 |                                 name="know_tgt_lang" 
112 |                                 label="Very well"
113 |                                 selected={know_tgt_lang}
114 |                                 onChange={setForm} 
115 |                             />
116 |                         </Col>
117 |                     </Row>
118 |                 </Form>
119 |             </div>
120 | 
121 |             <br />
122 |             <hr />
123 |             <br />
124 | 
125 |             <div style={{textAlign: "right"}}>
126 |                 <Button size="sm" variant="outline-primary" onClick={handleNext}>NEXT</Button>
127 |             </div>
128 |         </div>
129 |     )
130 | }
131 | 
132 | export default Step1;
133 | 


--------------------------------------------------------------------------------
/src/torchserve/Download_Transformer_models.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | from pathlib import Path
  5 | import torch
  6 | import transformers
  7 | from transformers import (
  8 |     AutoConfig,
  9 |     AutoModelForCausalLM,
 10 |     AutoModelForQuestionAnswering,
 11 |     AutoModelForSequenceClassification,
 12 |     AutoModelForTokenClassification,
 13 |     AutoTokenizer,
 14 |     AutoModelForSeq2SeqLM,
 15 |     set_seed,
 16 | )
 17 | 
 18 | print("Transformers version", transformers.__version__)
 19 | set_seed(1)
 20 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 21 | 
 22 | 
 23 | def transformers_model_downloader(
 24 |     mode, pretrained_model_name, num_labels, do_lower_case, max_length, torchscript
 25 | ):
 26 |     """This function, save the checkpoint, config file along with tokenizer config and vocab files
 27 |     of a transformer model of your choice.
 28 |     """
 29 |     print("Download model and tokenizer", pretrained_model_name)
 30 |     # loading pre-trained model and tokenizer
 31 |     if mode == "sequence_classification":
 32 |         config = AutoConfig.from_pretrained(
 33 |             pretrained_model_name, num_labels=num_labels, torchscript=torchscript
 34 |         )
 35 |         model = AutoModelForSequenceClassification.from_pretrained(
 36 |             pretrained_model_name, config=config
 37 |         )
 38 |         tokenizer = AutoTokenizer.from_pretrained(
 39 |             pretrained_model_name, do_lower_case=do_lower_case
 40 |         )
 41 |     elif mode == "question_answering":
 42 |         config = AutoConfig.from_pretrained(
 43 |             pretrained_model_name, torchscript=torchscript
 44 |         )
 45 |         model = AutoModelForQuestionAnswering.from_pretrained(
 46 |             pretrained_model_name, config=config
 47 |         )
 48 |         tokenizer = AutoTokenizer.from_pretrained(
 49 |             pretrained_model_name, do_lower_case=do_lower_case
 50 |         )
 51 |     elif mode == "token_classification":
 52 |         config = AutoConfig.from_pretrained(
 53 |             pretrained_model_name, num_labels=num_labels, torchscript=torchscript
 54 |         )
 55 |         model = AutoModelForTokenClassification.from_pretrained(
 56 |             pretrained_model_name, config=config
 57 |         )
 58 |         tokenizer = AutoTokenizer.from_pretrained(
 59 |             pretrained_model_name, do_lower_case=do_lower_case
 60 |         )
 61 |     elif mode == "text_generation":
 62 |         config = AutoConfig.from_pretrained(
 63 |             pretrained_model_name, num_labels=num_labels, torchscript=torchscript
 64 |         )
 65 |         model = AutoModelForCausalLM.from_pretrained(
 66 |             pretrained_model_name, config=config
 67 |         )
 68 |         tokenizer = AutoTokenizer.from_pretrained(
 69 |             pretrained_model_name, do_lower_case=do_lower_case
 70 |         )
 71 |     elif mode == "translation":
 72 |         # new mode create to handle the masakhane translation models
 73 |         tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
 74 |         model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name)
 75 | 
 76 |         # NOTE : for demonstration purposes, we do not go through the fine-tune processing here.
 77 |         # A Fine_tunining process based on your needs can be added.
 78 |         # An example of  Fine_tuned model has been provided in the README.
 79 | 
 80 |     NEW_DIR = Path(__file__).parent.joinpath("transformer_models", pretrained_model_name)
 81 |     NEW_DIR.mkdir(parents=True, exist_ok=True)
 82 |     print(f"Successfully created directory {NEW_DIR.__str__()} ")
 83 | 
 84 |     print(
 85 |         "Save model and tokenizer/ Torchscript model based on the setting from setup_config",
 86 |         pretrained_model_name,
 87 |         "in directory",
 88 |         NEW_DIR,
 89 |     )
 90 |     if save_mode == "pretrained":
 91 |         model.save_pretrained(NEW_DIR)
 92 |         tokenizer.save_pretrained(NEW_DIR)
 93 |     elif save_mode == "torchscript":
 94 |         dummy_input = "This is a dummy input for torch jit trace"
 95 |         inputs = tokenizer.encode_plus(
 96 |             dummy_input,
 97 |             max_length=int(max_length),
 98 |             pad_to_max_length=True,
 99 |             add_special_tokens=True,
100 |             return_tensors="pt",
101 |         )
102 |         input_ids = inputs["input_ids"].to(device)
103 |         attention_mask = inputs["attention_mask"].to(device)
104 |         model.to(device).eval()
105 |         traced_model = torch.jit.trace(model, (input_ids, attention_mask))
106 |         torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
107 |     return
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     dirname = os.path.dirname(__file__)
112 |     if len(sys.argv) > 1:
113 |         filename = os.path.join(dirname, sys.argv[1])
114 |     else:
115 |         filename = os.path.join(dirname, "setup_config.json")
116 |     f = open(filename)
117 |     settings = json.load(f)
118 |     mode = settings["mode"]
119 |     model_name = settings["model_name"]
120 |     num_labels = int(settings["num_labels"])
121 |     do_lower_case = settings["do_lower_case"]
122 |     max_length = settings["max_length"]
123 |     save_mode = settings["save_mode"]
124 |     if save_mode == "torchscript":
125 |         torchscript = True
126 |     else:
127 |         torchscript = False
128 | 
129 |     transformers_model_downloader(
130 |         mode, model_name, num_labels, do_lower_case, max_length, torchscript
131 |     )
132 | 


--------------------------------------------------------------------------------
/docs/debugging_setup.md:
--------------------------------------------------------------------------------
  1 | # Common SetUp errors and Debugging
  2 | 
  3 | ## Table of Contents
  4 | - [**Errors during setup**](#errors-during-setup)
  5 |   - [**Errors with Docker**](#errors-with-docker)
  6 |     - [**gcsfuse** - Noted on Mac M1 (Dec 2022)](#gcsfuse---noted-on-mac-m1-dec-2022)
  7 |     - [**failed to solve** - Noted on Mac M1 (Dec 2022)](#failed-to-solve---noted-on-mac-m1-dec-2022)
  8 |   - [**Errors with stand alone setup**](#errors-with-stand-alone-setup)
  9 |     - [**PyICU/Polyglot** - Noted on Linux/Ubuntu (Jun 2022)](#pyicupolyglot---noted-on-linuxubuntu-jun-2022)
 10 | - [**Checking the client, server/api \& database**](#checking-the-client-serverapi--database)
 11 |   - [**Check the client**](#check-the-client)
 12 |   - [**Check the api**](#check-the-api)
 13 |     - [**Notable API endpoints to test using GET:**](#notable-api-endpoints-to-test-using-get)
 14 |     - [**Notable API endpoints to test using POST:**](#notable-api-endpoints-to-test-using-post)
 15 |   - [**Check the database**](#check-the-database)
 16 |     - [**With Docker**](#with-docker)
 17 |     - [**With Stand alone backend**](#with-stand-alone-backend)
 18 | 
 19 | 
 20 | # **Errors during setup**
 21 | 
 22 | ## **Errors with Docker**
 23 | ### **gcsfuse** - Noted on Mac M1 (Dec 2022)
 24 | Seems to be a architecture issue, resolved by running the command:
 25 | ```bash
 26 | export DOCKER_DEFAULT_PLATFORM=linux/amd64
 27 | ```
 28 | [solution reference](https://github.com/GoogleCloudPlatform/gcsfuse/issues/586)
 29 | 
 30 | ### **failed to solve** - Noted on Mac M1 (Dec 2022)
 31 | Full err message:  
 32 | ```
 33 | failed to solve: rpc error: code = Unknown desc = failed to solve with frontend dockerfile.v0: failed to create LLB definition: failed to authorize: rpc error: code = Unknown desc = failed to fetch anonymous token: Get "https://auth.docker.io/token?scope=repository%3Alibrary%2Fnode%3Apull&service=registry.docker.io": dial tcp: lookup auth.docker.io on 192.168.0.1:53: no such host
 34 | ```
 35 | 
 36 | This is a ad-hoc error, possible solutions:
 37 | - Sign in to docker hub and docker cli ```docker signin```  
 38 | - Within `Docker hub>Settings>Docker Engine`,set  `buildkit` to `false`  
 39 | - Instead of `docker-compose`, try `docker compose`
 40 | - Lost all hope? Go make a cup of coffee, sometimes it works if you just give it a minute...
 41 | 
 42 | [solution signin reference](https://stackoverflow.com/questions/65361083/docker-build-failed-to-fetch-oauth-token-for-openjdk) | [solution buildkit reference](https://stackoverflow.com/questions/64221861/an-error-failed-to-solve-with-frontend-dockerfile-v0)
 43 | 
 44 | **Note** Running these commands is not advisable: 
 45 | ```bash 
 46 | export DOCKER_BUILDKIT=0
 47 | export COMPOSE_DOCKER_CLI_BUILD=0
 48 | ``` 
 49 | This will invalidate the GCSFuse fix for Mac M1.
 50 | 
 51 | ## **Errors with stand alone setup**
 52 | 
 53 | ### **PyICU/Polyglot** - Noted on Linux/Ubuntu (Jun 2022)
 54 | 
 55 | Resolved by running the commands:
 56 | ```bash
 57 | apt-get update
 58 | ```
 59 | 
 60 | Then either - from apt directly : https://packages.debian.org/source/stable/pyicu:
 61 | ```bash 
 62 | apt-get install python3-icu
 63 | ```
 64 | OR - from source:
 65 | ```bash
 66 | apt-get install pkg-config libicu-dev
 67 | pip install --no-binary=:pyicu: pyicu
 68 | ```
 69 | 
 70 | # **Checking the client, server/api & database**
 71 | ## **Check the client**
 72 | The client should be running on http://localhost:3000.
 73 | 
 74 | Check the terminal (standalone), inspect the webpage or view the docker logs for error output.
 75 | ## **Check the api**
 76 | The API should be running on http://localhost:5000 and return the following output:
 77 | ```json
 78 | {  
 79 |     "message": "welcome Masakhane Web"
 80 | }
 81 | ```
 82 | Check the terminal (standalone) or view the docker logs for error output.
 83 | 
 84 | ### **Notable API endpoints to test using GET:**
 85 | Make get requests by going to the web endpoint in your browser
 86 | | Endpoint |  Description | 
 87 | | -------- |  ----------- |
 88 | | http://localhost:5000/update | Updates the local database with the newly loaded models | 
 89 | | http://localhost:5000/translate | Lists the saved models |  
 90 | 
 91 | 
 92 | 
 93 | ### **Notable API endpoints to test using POST:**
 94 | Use a developer tool such as [Postman](https://www.postman.com/) to make POST requests
 95 | | Endpoint | Description | Example Body |
 96 | | ------ | --------- | --------- |
 97 | | http://localhost:5000/translate | Returns the translated text  | <pre lang="json">{<br>  "src_lang": "english",<br>  "tgt_lang": "swahili",<br>  "input":    "Hello, how are you?"<br>}</pre>|
 98 | 
 99 | ## **Check the database**
100 | Docker makes use of a postgreSQL database  
101 | The stand alone app uses sqlite, so there is an different method for access.
102 | 
103 | ### **With Docker**  
104 | The 'db-1' image in docker contains the database using PostgreSQL, you can access the DB system running on the image with the command:
105 | ```
106 | docker-compose -f docker-compose.yml exec db psql --username=masakhane --dbname=masakhane
107 | ```
108 | 
109 | List all databases:
110 | ```
111 | \l
112 | ```
113 | 
114 | Connect to the masakhane database: 
115 | ```
116 | \c masakhane
117 | ```
118 | 
119 | List relations
120 | ```
121 | \dt
122 | ```
123 | 
124 | See saved information in a relation:
125 | ```
126 | select * from language;
127 | ```
128 | 
129 | Quit the database:
130 | ```
131 | \q
132 | ```
133 | 
134 | ### **With Stand alone backend**  
135 | 
136 | Within the `src/server/core/` directory, run this command to start the python interpreter:
137 | ```
138 | python
139 | ``` 
140 | 
141 | Use the code below to check what is saved in the database
142 | 
143 | ```python
144 | import sqlite3, os
145 | 
146 | conn = sqlite3.connect("masakhane.db")
147 | c = conn.cursor()
148 | 
149 | for row in c.execute('SELECT * FROM feedback'):
150 | print(row)
151 | 
152 | for row in c.execute('SELECT * FROM language'):
153 | print(row)
154 | ```
155 | 


--------------------------------------------------------------------------------
/docs/project_details.md:
--------------------------------------------------------------------------------
  1 | # **Project Details**
  2 | The requirements of Masakhane Web is to faciliate translations for African languages using different machine translation models. There is also an feauture to provide feedback and correction to inaccurate translations. 
  3 | 
  4 | ## **Table of Contents**
  5 | - [**Tech Stack**](#tech-stack)
  6 |   - [**Frontend**](#frontend)
  7 |     - [**React**](#react)
  8 |     - [**Webpack**](#webpack)
  9 |   - [**Backend**](#backend)
 10 |     - [**Python**](#python)
 11 |     - [**Database**](#database)
 12 |     - [**Flask**](#flask)
 13 | - [**File Structure**](#file-structure)
 14 | 
 15 | 
 16 | 
 17 | # **Tech Stack**
 18 | 
 19 | ## **Frontend** 
 20 | Review the [client readme](../../src/client/README.md) for more information.
 21 | 
 22 | ### **React**
 23 | The frontend is written using [React](https://reactjs.org/).
 24 | 
 25 | ### **Webpack**
 26 | The frontend also makes use of [Webpack](https://webpack.js.org/), a static module bundler for modern JavaScript applications.
 27 | 
 28 | -  **Webpack DevServer & Proxy**  
 29 |     The [devServer](https://webpack.js.org/configuration/dev-server/) runs on http://translate.masakhane.io:80.  
 30 |     The [proxy](https://webpack.js.org/configuration/dev-server/#devserverproxy) allows you to send requests to http://translate.masakhane.io/translate and have it hit the backen at http://localhost:5000/translate. 
 31 | 
 32 | 
 33 | ## **Backend**
 34 | Review the [server readme](../../src/server/README.md) for more information
 35 | 
 36 | ### **Python**
 37 | The backend is written using [Python](https://www.python.org/)
 38 | 
 39 | ### **Database**
 40 | The backend database is predominantly PostgreSQL on Docker, but there is an option to use SQLite when running a stand-alone backend.
 41 | 
 42 | ### **Flask**
 43 | The backend also makes use of [Flask](https://flask.palletsprojects.com/en/2.2.x/), which is for web development in Python. 
 44 | 
 45 | - **App**  
 46 |     Masakhane Web makes use of the Flask [application factory](https://flask.palletsprojects.com/en/2.2.x/patterns/appfactories/) pattern in `src/core/__init__.py` 
 47 | 
 48 | - **API**  
 49 |     The API uses [flask_restful](https://flask-restful.readthedocs.io/en/latest/quickstart.html#resourceful-routing) and is defined in `src/core/resources/translate.py`.  
 50 |     It is initialised along with the app in `src/core/__init__.py`.  
 51 | 
 52 | - **Database**  
 53 |   The application interacts with the database using [flask_sqlalchemy](https://flask-sqlalchemy.palletsprojects.com/en/3.0.x/) and is defined in `src/core/extensions.py`.  
 54 |     It is initialised along with the app in `src/core/__init__.py`. (Note the `.env.dev` for database config)
 55 | 
 56 | # **File Structure**
 57 | 
 58 | ```
 59 | .masakhane-web
 60 | |-- docker-compose.yml              # Docker compose for local instance
 61 | |-- docker-compose.prod.yml         # Docker compose for production instance
 62 | |-- entrypoint.sh
 63 | |-- environment.yaml
 64 | `-- src
 65 |     |-- client                      # IDK much about the frontend, update required
 66 |     |   |-- Dockerfile                              
 67 |     |   |-- package-lock.json                     
 68 |     |   |-- package.json                                
 69 |     |   |-- public                                      
 70 |     |   |-- src                                   
 71 |     |   |   |-- App.js
 72 |     |   |   |-- App.test.js                       
 73 |     |   |   |-- components
 74 |     |   |   |   |-- translateCard.js
 75 |     |   |   |   `-- *others*
 76 |     |   |   |-- images
 77 |     |   |   |-- index.css                         
 78 |     |   |   |-- index.js
 79 |     |   |   |-- logo.svg
 80 |     |   |   |-- pages
 81 |     |   |   |   |-- About.js
 82 |     |   |   |   |-- Faq.js
 83 |     |   |   |   `-- Home.js
 84 |     |   |   |-- reportWebVitals.js
 85 |     |   |   |-- setupProxy.js                           
 86 |     |   |   `-- setupTests.js
 87 |     |   `-- webpack.config.js
 88 |     `-- server
 89 |         |-- __init__.py                                        
 90 |         |-- available_models.tsv    # TSV file containing available models     
 91 |         |-- languages.json          # JSON file containing language information (names, etc)
 92 |         |-- Dockerfile
 93 |         |-- entrypoint.sh           # Docker entrypoint for Dockerfile
 94 |         |-- Dockerfile.prod
 95 |         |-- entrypoint.prod.sh      # Docker entrypoint for Dockerfile.prod
 96 |         |-- requirements.txt        # Python dependencies
 97 |         |-- manage.py               # Manage CLI 
 98 |         |-- core
 99 |         |   |-- __init__.py         # Flask app factory & init
100 |         |   |-- resources     
101 |         |   |   `-- translate.py    # Flask API
102 |         |   |-- extensions.py       # Flask_SQLAlchemy init
103 |         |   |-- models
104 |         |   |   |-- feedback.py     # Feedback DB Model
105 |         |   |   |-- language.py     # Language DB Model
106 |         |   |   |-- predict.py      # I think this is in the wrong place, does the translation
107 |         |   |   `-- translation.py  # Translation object
108 |         |   |-- model_load.py       # Class to manage the download and loading of different translation models 
109 |         |   |-- config.py           # Different config states for dev enviroments
110 |         |   |-- languages.json      # Duplicate of ../languages.json
111 |         |   |-- tests           
112 |         |   |   |-- __init__.py
113 |         |   |   |-- base.py         # Test create app
114 |         |   |   |-- test_app.py     # Test API
115 |         |   |   `-- test_config.py  # Dev tests
116 |         |   |-- utils.py
117 |         |   `-- utils_bucket
118 |         |       |-- bucket.py
119 |         |       `-- upload_download.py
120 |         |-- models  # Translation models are stored here
121 |         |   `-- joeynmt
122 |         |       |-- en-sw-JW300     # File struct of a complete model for English to Swahili
123 |         |       |   |-- config.yaml
124 |         |       |   |-- config_orig.yaml
125 |         |       |   |-- model.ckpt
126 |         |       |   |-- src.bpe.model
127 |         |       |   |-- src_vocab.txt
128 |         |       |   |-- trg.bpe.model
129 |         |       |   `-- trg_vocab.txt
130 |         `-- nginx
131 |             |-- Dockerfile
132 |             `-- nginx.conf
133 | ```


--------------------------------------------------------------------------------
/docs/start_app_locally_doc.md:
--------------------------------------------------------------------------------
  1 | # **Running the App Locally**
  2 | 
  3 | The app can be run as a standalone or using Docker, unless you are working on an machine running linux/ubuntu, it is adviseable to use Docker.
  4 | 
  5 | To run the app in production, see [here](start_app_prod_doc.md).
  6 | 
  7 | For any errors during setup, please see the [debugging doc](debugging_setup.md).
  8 | 
  9 | Review the [project details doc](project_details.md) for more information on the technology stack.  
 10 | Take note of the [Client](../../src/client/README.md) and [Server](../../src/server/README.md) README's.
 11 | 
 12 | ## **Table of Contents**
 13 | - [**Using Docker ( Preferred )**](#using-docker--preferred-)
 14 |   - [**Docker Setup**](#docker-setup)
 15 |   - [**Running the app**](#running-the-app)
 16 |     - [**Building the App**](#building-the-app)
 17 |     - [**Shut down the app**](#shut-down-the-app)
 18 |     - [**Add, Update, \& Delete Languages**](#add-update--delete-languages)
 19 |     - [**Running tests**](#running-tests)
 20 |     - [**The Database**](#the-database)
 21 | - [**As a stand-alone app**](#as-a-stand-alone-app)
 22 |   - [**Backend Setup**](#backend-setup)
 23 |   - [**Run the server:**](#run-the-server)
 24 |     - [**The Database**](#the-database-1)
 25 |     - [**Add, Update, \& Delete Languages**](#add-update--delete-languages-1)
 26 |     - [**Running tests**](#running-tests-1)
 27 |   - [**Frontend Setup**](#frontend-setup)
 28 |   - [**Run the client:**](#run-the-client)
 29 | - [**Errors during setup**](#errors-during-setup)
 30 | 
 31 | 
 32 | # **Using Docker ( Preferred )**
 33 | 
 34 | The better/easier way to run the app is to use Docker, which will build both the frontend and the backend with the correct enviroment setup.
 35 | 
 36 | ## **Docker Setup**
 37 | 
 38 | Ensure you have `docker` & `docker-compose` installed on your computer, you can check with the following commands:
 39 | ```bash
 40 | docker --version
 41 | docker-compose --version
 42 | ```
 43 | 
 44 | If the above commands return an error, please install [Docker](https://docs.docker.com/engine/install/) and [Docker-compose](https://docs.docker.com/compose/install/).
 45 | 
 46 | ## **Running the app**
 47 | ###  **Building the App**
 48 | To build the app, from the root project directory, run the following command:
 49 | ```bash
 50 | docker-compose -f docker-compose.yml up -d --build
 51 | ```
 52 | 
 53 | Docker should create a container named 'masakhane-web' with the images 'db-1', 'server-1', and 'client-1'.  
 54 | The server should be active on http://localhost:5000 and the client on http://localhost:3000  
 55 | Look [here](debugging_setup.md#checking-the-client-serverapi--database) for checking these services manually.
 56 | 
 57 | ### **Shut down the app**
 58 | To shut down the app, run the following command to remove the docker container:
 59 | ```bash
 60 | docker-compose -f docker-compose.yml down
 61 | ```
 62 | 
 63 | ### **Add, Update, & Delete Languages**
 64 | **Add a Language**
 65 | ```bash
 66 | docker-compose -f docker-compose.yml exec server python manage.py add_language en-sw-JW300
 67 | ```
 68 | The language code parameter `en-sw-JW300` represents {src-lang}-{tgt-lang}-{shortform}  
 69 | So `en-sw-JW300` represents English-Swahili using JW300 shortform  
 70 | **Note** - A code parameter example without shortform is `en-tiv-`
 71 | 
 72 | Download available languages csv [here](https://zenodo.org/record/7417644/files/masakhane-mt-current-models.csv) 
 73 | 
 74 | **Update Langugaes**
 75 | ```bash
 76 | curl --request GET 'http://127.0.0.1:5000/update'
 77 | ```
 78 | 
 79 | **Check available languages**
 80 | ```bash
 81 | docker-compose -f docker-compose.yml exec server python manage.py all_languages
 82 | ```
 83 | 
 84 | **Remove a language**
 85 | ```bash
 86 | docker-compose -f docker-compose.yml exec server python manage.py remove_language en-sw-JW300
 87 | ```
 88 | 
 89 | ### **Running tests**
 90 | ```bash
 91 | docker-compose -f docker-compose.yml exec server python manage.py tests
 92 | ```
 93 | 
 94 | ### **The Database**
 95 | Look [here](debugging_setup.md#with-docker) for more information about accessing the database
 96 | 
 97 | # **As a stand-alone app**
 98 | In order to run the app, we need to set up the backend and frontend seperately.  
 99 | **Note** It is advisable to be working on an linux/ubuntu machine.
100 | 
101 | ## **Backend Setup**
102 | 
103 | First, ensure you are running [Python 3.6.9](https://www.python.org/downloads/release/python-369/)
104 | 
105 | Within the `src/server` directory of the project
106 | 
107 | **Install required packages:**
108 | ```bash
109 | pip install -r requirements.txt
110 | ```
111 | 
112 | **Run the following commands:**
113 | ```bash
114 | export FLASK_APP=core/__init__.py
115 | export FLASK_ENV=development
116 | ```
117 | 
118 | ## **Run the server:**
119 | To start the API and database services, run the command:
120 | ```bash
121 | python manage.py run
122 | ```
123 | 
124 | ### **The Database**
125 | Look [here](debugging_setup.md#with-stand-alone-backend) for more information about accessing the database
126 | 
127 | ### **Add, Update, & Delete Languages**
128 | **Add a Language**
129 | ```bash
130 | python manage.py add_language en-sw-JW300
131 | ```
132 | The language code parameter `en-sw-JW300` represents {src-lang}-{tgt-lang}-{shortform}  
133 | So `en-sw-JW300` represents English-Swahili using JW300 shortform  
134 | **Note** - A code parameter example without shortform is `en-tiv-`
135 | 
136 | Download available languages csv [here](https://zenodo.org/record/7417644/files/masakhane-mt-current-models.csv) 
137 | 
138 | **Update Langugaes**
139 | ```bash
140 | curl --request GET 'http://127.0.0.1:5000/update'
141 | ```
142 | **Check available languages**
143 | ```bash
144 | python manage.py all_languages
145 | ```
146 | 
147 | **Remove a language**
148 | ```bash
149 | python manage.py remove_language en-sw-JW300
150 | ```
151 | 
152 | ### **Running tests**
153 | ```bash
154 | python manage.py tests
155 | ```
156 | 
157 | The API is available at `http://localhost:5000`, see notable API endpoints [here](debugging_setup.md#check-the-api)
158 | 
159 | ## **Frontend Setup**
160 | 
161 | Ensure you have [node.js](https://nodejs.org/en/) and [yarn](https://classic.yarnpkg.com/en/docs/install) installed
162 | 
163 | Within the `src/client/` directory of the project:
164 | **Install required packages:**
165 | ```bash
166 | npm install --legacy-peer-deps
167 | ```
168 | 
169 | **Run the following commands:**
170 | ```bash
171 | npm i webpack webpack-cli --legacy-peer-deps
172 | npm i @babel/core @babel/preset-env @babel/preset-react babel-loader --legacy-peer-deps
173 | ```
174 | 
175 | ## **Run the client:**
176 | To start the client , run the command:
177 | ```bash
178 | npm run develop
179 | ```
180 | 
181 | The client is available at `http://localhost:3000`
182 | 
183 | # **Errors during setup**
184 | If there was a problem during setup, review [this doc](debugging_setup.md) for possible errors and solutions.
185 | 
186 | 


--------------------------------------------------------------------------------
/src/client/src/components/step2.js:
--------------------------------------------------------------------------------
  1 | import { Row, Col, Form, Button } from 'react-bootstrap';
  2 | import RadioButton from './common/radioButton';
  3 | import React from 'react';
  4 | 
  5 | const Step2 = ({ src_lang, tgt_lang, text, translation, setForm, formData, navigation, handleSubmitFeedback }) => {
  6 | 
  7 |     const { understand_translation, accurate_translation, own_translation } = formData;
  8 |     const { next } = navigation;
  9 | 
 10 |     const handleSubmit = () => {
 11 |         // submit form
 12 |         handleSubmitFeedback();
 13 |         // then navigate to next page
 14 |         next();
 15 |     }
 16 |     return (
 17 |         <div>
 18 |             <div style={{textAlign: 'center'}}>
 19 |                 <h6 style={{ fontSize: 13, color: 'gray' }}>Part 2/2</h6>
 20 |             </div>
 21 | 
 22 |             <div style={{textAlign: 'center'}}>
 23 |                 <Row>
 24 |                     <Col>
 25 |                         <p style={{ color: 'gray', fontSize: 11 }}>{!!src_lang && src_lang.toUpperCase()}</p>
 26 |                         <p style={{ fontSize: 11 }}>{text}</p>
 27 |                     </Col>
 28 |                     <Col><i className="fa fa-arrow-right"></i></Col>
 29 |                     <Col>
 30 |                         <p style={{ color: 'gray', fontSize: 11 }}>{!!tgt_lang && tgt_lang.toUpperCase()}</p>
 31 |                         <p style={{ fontSize: 11 }}>{!!translation && translation}</p>
 32 |                     </Col>
 33 |                 </Row>
 34 |             </div>
 35 | 
 36 |             <br />
 37 |             <hr />
 38 |             <br />
 39 | 
 40 |             <div style={{textAlign: 'center'}}>
 41 |                 <p style={{ fontSize: 13, fontWeight: 'bold' }}>Did you understand the translation? / Did it make sense?</p>
 42 |                 <Form>
 43 |                     <Row>
 44 |                         <Col>
 45 |                             <RadioButton 
 46 |                                 value="none"
 47 |                                 name="understand_translation"
 48 |                                 label="Not at all"
 49 |                                 selected={understand_translation}
 50 |                                 onChange={setForm} 
 51 |                             />
 52 |                         </Col>
 53 |                         <Col>
 54 |                             <RadioButton 
 55 |                                 value="some"
 56 |                                 name="understand_translation"
 57 |                                 label="Some of it"
 58 |                                 selected={understand_translation}
 59 |                                 onChange={setForm} 
 60 |                             />
 61 |                         </Col>
 62 |                         <Col>
 63 |                             <RadioButton 
 64 |                                 value="most"
 65 |                                 name="understand_translation"
 66 |                                 label="Most of it"
 67 |                                 selected={understand_translation}
 68 |                                 onChange={setForm} 
 69 |                             />
 70 |                         </Col>
 71 |                         <Col>
 72 |                             <RadioButton 
 73 |                                 value="absolutely"
 74 |                                 name="understand_translation"
 75 |                                 label="Absolutely"
 76 |                                 selected={understand_translation}
 77 |                                 onChange={setForm} 
 78 |                             />
 79 |                         </Col>
 80 |                     </Row>
 81 |                 </Form>
 82 |             </div>
 83 | 
 84 |             <br />
 85 |             <hr />
 86 |             <br />
 87 | 
 88 |             <div style={{textAlign: 'center'}}>
 89 |                 <p style={{ fontSize: 13, fontWeight: 'bold' }}>How accurate was the translation?</p>
 90 |                 <Form>
 91 |                     <Row>
 92 |                         <Col>
 93 |                             <RadioButton 
 94 |                                 value="nonsense"
 95 |                                 name="accurate_translation"
 96 |                                 label="Nonsense"
 97 |                                 selected={accurate_translation}
 98 |                                 onChange={setForm} 
 99 |                             />
100 |                         </Col>
101 |                         <Col>
102 |                             <RadioButton 
103 |                                 value="needs_work"
104 |                                 name="accurate_translation"
105 |                                 label="Needs alot of work"
106 |                                 selected={accurate_translation}
107 |                                 onChange={setForm} 
108 |                             />
109 |                         </Col>
110 |                         <Col>
111 |                             <RadioButton 
112 |                                 value="needs_tweaking"
113 |                                 name="accurate_translation"
114 |                                 label="Needs tweaking"
115 |                                 selected={accurate_translation}
116 |                                 onChange={setForm} 
117 |                             />
118 |                         </Col>
119 |                         <Col>
120 |                             <RadioButton 
121 |                                 value="accurate"
122 |                                 name="accurate_translation"
123 |                                 label="Accurate"
124 |                                 selected={accurate_translation}
125 |                                 onChange={setForm} 
126 |                             />
127 |                         </Col>
128 |                     </Row>
129 |                 </Form>
130 |             </div>
131 | 
132 |             <br />
133 |             <hr />
134 |             <br />
135 | 
136 |             <div style={{textAlign: 'center'}}>
137 |                 <p style={{ fontSize: 13, fontWeight: 'bold' }}>How would you have translated this? (Optional)</p>
138 |                 <div style={{width: '100%', margin: '0 auto', borderWidth: 1, borderColor: 'gray' }}>
139 |                     <Form>
140 |                         <Form.Group controlId="Form.ControlTextarea2">
141 |                             <Form.Control 
142 |                                 as="textarea"
143 |                                 placeholder="Enter your translation here..." 
144 |                                 rows="3" 
145 |                                 name="own_translation"
146 |                                 style={{ height: '100px', fontSize: 13 }} 
147 |                                 value={own_translation}
148 |                                 onChange={setForm}
149 |                             />
150 |                         </Form.Group>
151 |                     </Form>
152 |                 </div>
153 |             </div>
154 | 
155 |             <br />
156 |             <hr />
157 |             <br />
158 | 
159 |             <div style={{textAlign: "right"}}>
160 |                 <Button size="sm" variant="outline-primary" onClick={handleSubmit}>SUBMIT</Button>
161 |             </div>
162 |         </div>
163 |     )
164 | }
165 | 
166 | export default Step2;
167 | 


--------------------------------------------------------------------------------
/src/server/core/resources/translate.py:
--------------------------------------------------------------------------------
  1 | #External modules
  2 | from flask_restful import Resource
  3 | from flask import request
  4 | from http import HTTPStatus
  5 | from collections import defaultdict
  6 | import os, json
  7 | #Internal modules
  8 | from core.model_load import MasakhaneModelLoader
  9 | from core.models.predict import Predicter
 10 | from core.models.feedback import Feedback
 11 | from core.models.language import Language
 12 | from core.models.translation import Translation
 13 | 
 14 | from pathlib import Path
 15 | 
 16 | 
 17 | class TranslateResource(Resource):
 18 |     """ TranslateResource
 19 |         -----------------
 20 |         #### User-Defined Flask API Resource accepting GET & POST\n
 21 |         GET - List's available models\\
 22 |         POST - Performs translation from srg lang to tgt lang, review the server ReadMe for more info.
 23 |     """
 24 |     def __init__(self, saved_models):
 25 |         self.models = saved_models
 26 | 
 27 |         # load languages.json into distros_dict
 28 |         json_file = os.environ.get('JSON','./languages.json')
 29 |         with open(json_file, 'r') as f:
 30 |             distros_dict = json.load(f)
 31 |         # init empty dicts to store full_name to short_name bindings
 32 |         self.languages_short_to_full = {}
 33 |         self.languages_full_to_short = {}
 34 | 
 35 |         for distro in distros_dict:
 36 |             self.languages_short_to_full[distro['language_short'].lower(
 37 |             )] = distro['language_en'].lower()
 38 |             self.languages_full_to_short[distro['language_en'].lower(
 39 |             )] = distro['language_short'].lower()
 40 |         # Example: languages_short_to_full['sw'] = 'swahili'
 41 |         # Example: languages_full_to_short['Swahili'] = 'sw'
 42 | 
 43 |     def post(self):
 44 |         """POST method to translate a given input
 45 |         ---
 46 | 
 47 |         ### Request Body
 48 |         ```json 
 49 |         {
 50 |             "src_lang" : "src_lang_full",
 51 |             "tgt_lang" : "tgt_lang_full",
 52 |             "input": "input_text",
 53 |         }
 54 |         ```
 55 |         ### Returns a Translation Object defined in `src/server/core/models/translation.py`
 56 |         ```json 
 57 |         {
 58 |             "src_lang" : "src_lang_full",
 59 |             "tgt_lang" : "tgt_lang_full",
 60 |             "input": "input_text",
 61 |             "output": "translation_result"
 62 |         }
 63 |         ```
 64 |         """
 65 |         # Get req body
 66 |         data = request.get_json()
 67 |         source_language = data['src_lang'].lower()
 68 |         target_language = data['tgt_lang'].lower()
 69 | 
 70 |         #Get short_name from self.language_dicts
 71 |         source_language_short = self.languages_full_to_short[source_language]
 72 |         target_language_short = self.languages_full_to_short[target_language]
 73 | 
 74 |         #model key to provide translation
 75 |         input_model = source_language_short+'-'+target_language_short
 76 | 
 77 |         if input_model not in self.models.keys():
 78 |             return {'message': 'model not found'}, HTTPStatus.NOT_FOUND
 79 |         else:
 80 |             translation_result = Predicter().translate(
 81 |                 data['input'], model=self.models[input_model]['model'],
 82 |                 src_vocab=self.models[input_model]['src_vocab'],
 83 |                 trg_vocab=self.models[input_model]['trg_vocab'],
 84 |                 preprocess=self.models[input_model]['preprocess'],
 85 |                 postprocess=self.models[input_model]['postprocess'],
 86 |                 logger=self.models[input_model]['logger'],
 87 |                 beam_size=self.models[input_model]['beam_size'],
 88 |                 beam_alpha=self.models[input_model]['beam_alpha'],
 89 |                 level=self.models[input_model]['level'],
 90 |                 lowercase=self.models[input_model]['lowercase'],
 91 |                 max_output_length=self.models[input_model]['max_output_length'],
 92 |                 use_cuda=self.models[input_model]['use_cuda'],
 93 |             )
 94 | 
 95 |             trans = Translation(src_lang=data['src_lang'],
 96 |                                 tgt_lang=data['tgt_lang'],
 97 |                                 input=data['input'],
 98 |                                 output=translation_result)
 99 | 
100 |             return trans.data, HTTPStatus.CREATED
101 | 
102 |     def get(self):
103 |         """GET Method to list available models in memory
104 |         ---
105 | 
106 |         Returns a json list, ie
107 |         ```json
108 |         [
109 |             {
110 |                 "type": "source",
111 |                 "name": "src_lang_full",
112 |                 "value": "src_lang_short",
113 |                 "targets": [
114 |                     {
115 |                         "name": "tgt_lang_full",
116 |                         "value": "tgt_lang_short"
117 |                     }
118 |                 ]
119 |             }
120 |         ]
121 |         ```
122 |         """
123 | 
124 |         dict_output = defaultdict(lambda: [])
125 |         #for each src-tgt key in model dict 
126 |         for couple in list(self.models.keys()):
127 |             src, tgt = couple.split("-")
128 |             dict_output[src].append(
129 |                 {
130 |                     'name': self.languages_short_to_full[tgt].capitalize(),
131 |                     'value': tgt
132 |                 }
133 |             )
134 | 
135 |         output = []
136 |         for source in dict_output:
137 |             output.append(
138 |                 {
139 |                     "type": "source",
140 |                     "name": self.languages_short_to_full[source].capitalize(),
141 |                     "value": source,
142 |                     'targets': dict_output[source]
143 |                 }
144 |             )
145 | 
146 |         return output, HTTPStatus.OK
147 | 
148 | 
149 | class AddResource(Resource):
150 |     """ AddResource
151 |         -----------------
152 |         #### User-Defined Flask API Resource accepting GET\n
153 |         GET - Updates the models based on the model info stored in the Language table
154 |     """
155 |     def __init__(self, saved_models):
156 |         self.models = saved_models
157 |         # Load file path to avialable_models.tsv which has all the github & google drive links that store the model files
158 |         self.selected_models_file = os.environ.get('MODEL_ALL_FILE',
159 |                                                    "./available_models.tsv")
160 | 
161 |     def get(self):
162 |         """GET Method to update the available models
163 |             ---
164 |             Returns a json Object, ie
165 |             ```json
166 |             {
167 |                 "message": "Models updated"
168 |             }
169 |             ```
170 |         """
171 |         model_loader = MasakhaneModelLoader(available_models_file=os.environ.get('MODEL_ALL_FILE',
172 |                                              './available_models.tsv'))
173 |         db_pairs = []
174 |         model_directory = Path.cwd().joinpath('models', 'joeynmt')
175 |         downloaded_models = list(model_directory.iterdir())
176 |         #loads model info from the Language table
177 |         for lan in Language.query.all():
178 |             language_pair = lan.to_json()
179 |             src_language =language_pair['source']
180 |             tgt_language = language_pair['target']
181 |             domain = language_pair['domain']
182 |             db_pair = f"{language_pair['source']}-{language_pair['target']}"
183 |             # check if the model is not already loaded in memory
184 |             if db_pair not in list(self.models.keys()):
185 |                 name_tag = src_language+"-"+tgt_language+"-"+domain
186 |                 # check if the model is not already downloaded
187 |                 if name_tag not in downloaded_models:
188 |                     print("Downloading model for "+name_tag)
189 |                     model_loader.download_model(src_language, tgt_language, domain)
190 |                 # Attempts to download model and store in self.models
191 |                 self.models[db_pair] = model_loader.load_model(src_language, tgt_language, domain)
192 |                 print(f"db_pair : {db_pair} \n now : {list(self.models.keys())}")
193 | 
194 |             # keep all the pairs in the db
195 |             db_pairs.append(db_pair)
196 | 
197 |         # Remove models from memory that are not listed in the DB Language table 
198 |         for pair in list(self.models.keys()):
199 |             if pair not in db_pairs:
200 |                 del self.models[pair]
201 | 
202 |         return {'message': "Models updated"}, HTTPStatus.OK
203 | 
204 | 
205 | class SaveResource(Resource):
206 |     """ SaveResource
207 |         ------------
208 |         #### User-Defined Flask API Resource accepting POST\n
209 |         POST - saves feedback/correction information into the Feedback database
210 |     """
211 |     def __init__(self):
212 |         super().__init__()
213 | 
214 |     def post(self):
215 |         """POST Method to save feeback into the DB Feedback table
216 |         ---
217 |         ### Request Body
218 |         ```json 
219 |         {
220 |             "src_lang" : "src_lang_full",
221 |             "tgt_lang" : "tgt_lang_full",
222 |             "input": "input_text",
223 |             "review": "translation_correction",
224 |             "stars": "translation_confidence",
225 |             "token": "user_auth(bool)",
226 |         }
227 |         ```
228 |         ### Returns a Translation Object defined in `src/server/core/models/translation.py
229 |         ```json 
230 |         {
231 |             "message": "Review saved"
232 |         }
233 |         """
234 | 
235 |         data = request.get_json()
236 | 
237 |         feedback = Feedback(
238 |             src_lang=data['src_lang'],
239 |             tgt_lang=data['tgt_lang'],
240 |             accurate_translation=data['accurate_translation'],
241 |             know_src_lang=data['know_src_lang'],
242 |             know_tgt_lang=data['know_tgt_lang'],
243 |             own_translation=data['own_translation'],
244 |             text=data['text'],
245 |             translation=data['translation'],
246 |             understand_translation=data['understand_translation'],
247 |             feedbackToken=data['feedbackToken'])
248 | 
249 |         feedback.save()
250 | 
251 |         return {'message': "Review saved"}, HTTPStatus.CREATED
252 | 
253 | 
254 | class HomeResource(Resource):
255 |     """ HomeResource
256 |         ------------
257 |         User-Defined Flask API Resource accepting GET\n
258 |         GET - returns {'message': "welcome Masakhane Web"}
259 |     """
260 |     def __init__(self):
261 |         super().__init__()
262 | 
263 |     def get(self):
264 |         return {'message': "welcome Masakhane Web"}, HTTPStatus.OK
265 | 


--------------------------------------------------------------------------------
/src/torchserve/transformer_handler.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import json
  3 | import logging
  4 | import os
  5 | from abc import ABC
  6 | 
  7 | import torch
  8 | import transformers
  9 | from captum.attr import LayerIntegratedGradients
 10 | from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 11 | 
 12 | from ts.torch_handler.base_handler import BaseHandler
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | logger.info("Transformers version %s", transformers.__version__)
 16 | 
 17 | 
 18 | class M2MTranslatorHandler(BaseHandler, ABC):
 19 |     """
 20 |     Transformer handler for machine translation task using the m2m_100 model.
 21 |     """
 22 | 
 23 |     def __init__(self):
 24 |         super(M2MTranslatorHandler, self).__init__()
 25 |         self.initialized = False
 26 | 
 27 |     def initialize(self, ctx):
 28 |         """In this initialize function, the BERT model is loaded and
 29 |         the Layer Integrated Gradients Algorithm for Captum Explanations
 30 |         is initialized here.
 31 |         Args:
 32 |             ctx (context): It is a JSON Object containing information
 33 |             pertaining to the model artefacts parameters.
 34 |         """
 35 |         self.manifest = ctx.manifest
 36 |         properties = ctx.system_properties
 37 |         model_dir = properties.get("model_dir")
 38 |         serialized_file = self.manifest["model"]["serializedFile"]
 39 |         model_pt_path = os.path.join(model_dir, serialized_file)
 40 | 
 41 |         self.device = torch.device(
 42 |             "cuda:" + str(properties.get("gpu_id"))
 43 |             if torch.cuda.is_available() and properties.get("gpu_id") is not None
 44 |             else "cpu"
 45 |         )
 46 |         self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
 47 | 
 48 |         self.model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
 49 |         self.model.eval()
 50 | 
 51 |         logger.info("Transformer model from path %s loaded successfully", model_dir)
 52 |         
 53 |         self.initialized = True
 54 | 
 55 |     def preprocess(self, requests):
 56 |         """Basic text preprocessing, based on the user's choice of application mode.
 57 |         Args:
 58 |             requests (str): The Input data in the form of text is passed on to the preprocess
 59 |             function.
 60 |         Returns:
 61 |             list : The preprocess function returns a list of Tensor for the size of the word tokens.
 62 |         """
 63 |         input_ids_batch = None
 64 |         attention_mask_batch = None
 65 |         for idx, data in enumerate(requests):
 66 |             input_text = data.get("data")
 67 |             if input_text is None:
 68 |                 input_text = data.get("body")
 69 |             if isinstance(input_text, (bytes, bytearray)):
 70 |                 input_text = input_text.decode("utf-8")
 71 |             logger.info("Received text: '%s'", input_text)
 72 |         
 73 |             inputs = self.tokenizer.encode_plus(
 74 |                     input_text,
 75 |                     max_length=int(max_length),
 76 |                     pad_to_max_length=True,
 77 |                     add_special_tokens=True,
 78 |                     return_tensors="pt",
 79 |                 )
 80 |             input_ids = inputs["input_ids"].to(self.device)
 81 |             attention_mask = inputs["attention_mask"].to(self.device)
 82 |             # making a batch out of the recieved requests
 83 |             # attention masks are passed for cases where input tokens are padded.
 84 |             if input_ids.shape is not None:
 85 |                 if input_ids_batch is None:
 86 |                     input_ids_batch = input_ids
 87 |                     attention_mask_batch = attention_mask
 88 |                 else:
 89 |                     input_ids_batch = torch.cat((input_ids_batch, input_ids), 0)
 90 |                     attention_mask_batch = torch.cat(
 91 |                         (attention_mask_batch, attention_mask), 0
 92 |                     )
 93 |         return (input_ids_batch, attention_mask_batch)
 94 | 
 95 |     def inference(self, input_batch):
 96 |         """Predict the class (or classes) of the received text using the
 97 |         serialized transformers checkpoint.
 98 |         Args:
 99 |             input_batch (list): List of Text Tensors from the pre-process function is passed here
100 |         Returns:
101 |             list : It returns a list of the predicted value for the input text
102 |         """
103 |         input_ids_batch, attention_mask_batch = input_batch
104 |         inferences = []
105 |         # todos need to change this to use the generation configuration
106 |         outputs = self.model.generate(
107 |                 input_ids_batch, max_length=50, do_sample=True, top_p=0.95, top_k=60
108 |             )
109 |         for i, x in enumerate(outputs):
110 |             inferences.append(self.tokenizer.decode(outputs[i], skip_special_tokens=True))
111 |             logger.info("Generated text: '%s'", inferences)
112 | 
113 |         logger.info("Generated text", inferences)
114 |         return inferences
115 | 
116 |     def postprocess(self, inference_output):
117 |         """Post Process Function converts the predicted response into Torchserve readable format.
118 |         Args:
119 |             inference_output (list): It contains the predicted response of the input text.
120 |         Returns:
121 |             (list): Returns a list of the Predictions and Explanations.
122 |         """
123 |         return inference_output
124 | 
125 |     def get_insights(self, input_batch, text, target):
126 |         """This function initialize and calls the layer integrated gradient to get word importance
127 |         of the input text if captum explanation has been selected through setup_config
128 |         Args:
129 |             input_batch (int): Batches of tokens IDs of text
130 |             text (str): The Text specified in the input request
131 |             target (int): The Target can be set to any acceptable label under the user's discretion.
132 |         Returns:
133 |             (list): Returns a list of importances and words.
134 |         """
135 | 
136 |         if self.setup_config["captum_explanation"]:
137 |             embedding_layer = getattr(self.model, self.setup_config["embedding_name"])
138 |             embeddings = embedding_layer.embeddings
139 |             self.lig = LayerIntegratedGradients(captum_sequence_forward, embeddings)
140 |         else:
141 |             logger.warning("Captum Explanation is not chosen and will not be available")
142 | 
143 |         if isinstance(text, (bytes, bytearray)):
144 |             text = text.decode("utf-8")
145 |         text_target = ast.literal_eval(text)
146 | 
147 |         if not self.setup_config["mode"] == "question_answering":
148 |             text = text_target["text"]
149 |         self.target = text_target["target"]
150 | 
151 |         input_ids, ref_input_ids, attention_mask = construct_input_ref(
152 |             text, self.tokenizer, self.device, self.setup_config["mode"]
153 |         )
154 |         all_tokens = get_word_token(input_ids, self.tokenizer)
155 |         response = {}
156 |         response["words"] = all_tokens
157 |         return [response]
158 | 
159 | 
160 | def construct_input_ref(text, tokenizer, device, mode):
161 |     """For a given text, this function creates token id, reference id and
162 |     attention mask based on encode which is faster for captum insights
163 |     Args:
164 |         text (str): The text specified in the input request
165 |         tokenizer (AutoTokenizer Class Object): To word tokenize the input text
166 |         device (cpu or gpu): Type of the Environment the server runs on.
167 |     Returns:
168 |         input_id(Tensor): It attributes to the tensor of the input tokenized words
169 |         ref_input_ids(Tensor): Ref Input IDs are used as baseline for the attributions
170 |         attention mask() :  The attention mask is a binary tensor indicating the position
171 |          of the padded indices so that the model does not attend to them.
172 |     """
173 | 
174 |     text_ids = tokenizer.encode(text, add_special_tokens=False)
175 |     # construct input token ids
176 |     logger.info("text_ids %s", text_ids)
177 |     logger.info("[tokenizer.cls_token_id] %s", [tokenizer.cls_token_id])
178 |     input_ids = [tokenizer.cls_token_id] + text_ids + [tokenizer.sep_token_id]
179 |     logger.info("input_ids %s", input_ids)
180 | 
181 |     input_ids = torch.tensor([input_ids], device=device)
182 |     # construct reference token ids
183 |     ref_input_ids = (
184 |         [tokenizer.cls_token_id]
185 |         + [tokenizer.pad_token_id] * len(text_ids)
186 |         + [tokenizer.sep_token_id]
187 |     )
188 |     ref_input_ids = torch.tensor([ref_input_ids], device=device)
189 |     # construct attention mask
190 |     attention_mask = torch.ones_like(input_ids)
191 |     return input_ids, ref_input_ids, attention_mask
192 | 
193 | 
194 | def captum_sequence_forward(inputs, attention_mask=None, position=0, model=None):
195 |     """This function is used to get the predictions from the model and this function
196 |     can be used independent of the type of the BERT Task.
197 |     Args:
198 |         inputs (list): Input for Predictions
199 |         attention_mask (list, optional): The attention mask is a binary tensor indicating the position
200 |          of the padded indices so that the model does not attend to them, it defaults to None.
201 |         position (int, optional): Position depends on the BERT Task.
202 |         model ([type], optional): Name of the model, it defaults to None.
203 |     Returns:
204 |         list: Prediction Outcome
205 |     """
206 |     model.eval()
207 |     model.zero_grad()
208 |     pred = model(inputs, attention_mask=attention_mask)
209 |     pred = pred[position]
210 |     return pred
211 | 
212 | 
213 | def summarize_attributions(attributions):
214 |     """Summarises the attribution across multiple runs
215 |     Args:
216 |         attributions ([list): attributions from the Layer Integrated Gradients
217 |     Returns:
218 |         list : Returns the attributions after normalizing them.
219 |     """
220 |     attributions = attributions.sum(dim=-1).squeeze(0)
221 |     attributions = attributions / torch.norm(attributions)
222 |     return attributions
223 | 
224 | 
225 | def get_word_token(input_ids, tokenizer):
226 |     """constructs word tokens from token id using the BERT's
227 |     Auto Tokenizer
228 |     Args:
229 |         input_ids (list): Input IDs from construct_input_ref method
230 |         tokenizer (class): The Auto Tokenizer Pre-Trained model object
231 |     Returns:
232 |         (list): Returns the word tokens
233 |     """
234 |     indices = input_ids[0].detach().tolist()
235 |     tokens = tokenizer.convert_ids_to_tokens(indices)
236 |     # Remove unicode space character from BPE Tokeniser
237 |     tokens = [token.replace("Ġ", "") for token in tokens]
238 |     return tokens
239 | 


--------------------------------------------------------------------------------
/src/server/core/model_load.py:
--------------------------------------------------------------------------------
  1 | import os, yaml, logging, re
  2 | # external imports
  3 | import torch
  4 | from joeynmt.helpers import load_config
  5 | from subword_nmt import apply_bpe
  6 | from subword_nmt import apply_bpe
  7 | from sacremoses import MosesTokenizer, MosesDetokenizer
  8 | from joeynmt.helpers import load_config, get_latest_checkpoint, \
  9 |     load_checkpoint
 10 | from joeynmt.vocabulary import build_vocab
 11 | from joeynmt.model import build_model
 12 | from joeynmt.prediction import validate_on_data
 13 | from urllib.request import urlopen
 14 | from io import BytesIO
 15 | from zipfile import ZipFile
 16 | 
 17 | # internal imports
 18 | from core.utils import load_line_as_data
 19 | 
 20 | class MasakhaneModelLoader():
 21 |     """User Defined Class to manage the download of machine trasnlation models"""
 22 |     def __init__(self, available_models_file):
 23 |         # model directory to store the modeks
 24 |         self._model_dir_prefix = os.environ.get('MODEL',
 25 |                                                 "./models/joeynmt/")
 26 |         self._src_language = ''
 27 |         #load availiable models into memory
 28 |         self.models = self.load_available_models(available_models_file)
 29 | 
 30 |     def load_available_models(self, available_models_file):
 31 |         """Load a dictonary with available models to download"""
 32 |         models = {}
 33 |         with open(available_models_file, 'r') as ofile:
 34 |             # iterate over file entries
 35 |             for i, line in enumerate(ofile):
 36 |                 entries = line.strip().split("\t")
 37 |                 # extract headers
 38 |                 if i == 0:
 39 |                     header_keys = [h.__str__() for h in entries]
 40 |                     continue
 41 |                 
 42 |                 # build available model dictionary from the headers & entries:
 43 |                 # https://www.geeksforgeeks.org/python-dictionary-comprehension/
 44 |                 model = {key:value for key,value in zip(header_keys, entries)}
 45 |                 # don't add incomplete models
 46 |                 if model['complete'] != 'yes':
 47 |                     continue
 48 |     
 49 |                 models[f"{model['src_language']}-{model['tgt_language']}-{model['domain']}"] = model
 50 | 
 51 |         print('Found {} Masakhane models.'.format(len(models)))
 52 | 
 53 |         return models
 54 | 
 55 |     def download_model(self, src_language, tgt_language, domain):
 56 |         """ Download model for given trg language. """
 57 |         model_dir = f"{self._model_dir_prefix}{src_language}-{tgt_language}-{domain}"
 58 | 
 59 |         if not os.path.exists(model_dir):
 60 |             os.system(f'mkdir -p {model_dir}')
 61 | 
 62 |         model_files = self.models[f"{src_language}-{tgt_language}-{domain}"]
 63 | 
 64 |         # Check if files exist
 65 |         ckpt_path = os.path.join(model_dir, 'model.ckpt')
 66 |         src_vocab_path = os.path.join(model_dir, 'src_vocab.txt')
 67 |         trg_vocab_path = os.path.join(model_dir, 'trg_vocab.txt')
 68 |         config_path = os.path.join(model_dir, 'config_orig.yaml')
 69 |         src_bpe_path = os.path.join(model_dir, 'src.bpe.model')
 70 |         trg_bpe_path = os.path.join(model_dir, 'trg.bpe.model')
 71 | 
 72 |         if not os.path.exists in [ckpt_path, src_vocab_path, trg_vocab_path, config_path, src_bpe_path, trg_bpe_path]:
 73 |             URL = "https://zenodo.org/record/7636723/files/" + \
 74 |                 src_language + "-" + tgt_language
 75 |             if domain == "":
 76 |                 URL += "-baseline.zip?download=1"
 77 |             else:
 78 |                 URL += "-" + domain + "-baseline.zip?download=1"
 79 | 
 80 |             http_response = urlopen(URL)
 81 |             zipfile = ZipFile(BytesIO(http_response.read()))
 82 |             zipfile.extractall(path=model_dir)        
 83 | 
 84 |         # Rename config file to config_orig.yaml.
 85 |         os.rename(os.path.join(model_dir, 'config.yaml'), config_path)
 86 | 
 87 |         # Adjust config.
 88 |         config = load_config(config_path)
 89 |         new_config_file = os.path.join(model_dir, 'config.yaml')
 90 |         config = self._update_config(config, src_vocab_path, trg_vocab_path,
 91 |                                      model_dir, ckpt_path)
 92 |         with open(new_config_file, 'w') as cfile:
 93 |             yaml.dump(config, cfile)
 94 |         
 95 |         print('Downloaded model for {}-{}.'.format(src_language, tgt_language))
 96 | 
 97 |     def load_model(self, src_language, tgt_language, domain, bpe_src_code=None, tokenize=None):
 98 |         """ Load model for given trg language. """
 99 |         model_dir = f"{self._model_dir_prefix}{src_language}-{tgt_language}-{domain}"
100 | 
101 |         ckpt_path = os.path.join(model_dir, 'model.ckpt')
102 |         src_vocab_path = os.path.join(model_dir, 'src_vocab.txt')
103 |         trg_vocab_path = os.path.join(model_dir, 'trg_vocab.txt')
104 |         config_path = os.path.join(model_dir, 'config_orig.yaml')
105 | 
106 |         # Adjust config.
107 |         config = load_config(config_path)
108 |         new_config_file = os.path.join(model_dir, 'config.yaml')
109 |         config = self._update_config(config, src_vocab_path, trg_vocab_path,
110 |                                      model_dir, ckpt_path)
111 |         with open(new_config_file, 'w') as cfile:
112 |             yaml.dump(config, cfile)
113 | 
114 |         print('Loaded model for {}-{}.'.format(src_language, tgt_language))
115 | 
116 |         conf = {}
117 | 
118 |         logger = logging.getLogger(__name__)
119 |         conf["logger"] = logger
120 |         # load the Joey configuration
121 |         cfg = load_config(new_config_file)
122 |         # load the checkpoint
123 |         if "load_model" in cfg['training'].keys():
124 |             ckpt = cfg['training']["load_model"]
125 |         else:
126 |             ckpt = get_latest_checkpoint(model_dir)
127 |             if ckpt is None:
128 |                 raise FileNotFoundError("No checkpoint found in directory {}."
129 |                                         .format(model_dir))
130 | 
131 |         # prediction parameters from config
132 |         conf["use_cuda"] = cfg["training"].get(
133 |             "use_cuda", False) if torch.cuda.is_available() else False
134 | 
135 |         conf["level"] = cfg["data"]["level"]
136 |         conf["max_output_length"] = cfg["training"].get(
137 |             "max_output_length", None)
138 |         conf["lowercase"] = cfg["data"].get("lowercase", False)
139 | 
140 |         # load the vocabularies
141 |         src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt"
142 |         trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt"
143 | 
144 |         conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file,
145 |                                         dataset=None, max_size=-1, min_freq=0)
146 |         conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file,
147 |                                         dataset=None, max_size=-1, min_freq=0)
148 | 
149 |         # whether to use beam search for decoding, 0: greedy decoding
150 |         if "testing" in cfg.keys():
151 |             conf["beam_size"] = cfg["testing"].get("beam_size", 0)
152 |             conf["beam_alpha"] = cfg["testing"].get("alpha", -1)
153 |         else:
154 |             conf["beam_size"] = 1
155 |             conf["beam_alpha"] = -1
156 | 
157 |         # pre-processing
158 |         if tokenize is not None:
159 |             src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"])
160 |             trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"])
161 |             # tokenize input
162 |             def tokenizer(x): return src_tokenizer.tokenize(x, return_str=True)
163 |             def detokenizer(x): return trg_tokenizer.detokenize(
164 |                 x.split(), return_str=True)
165 |         else:
166 |             def tokenizer(x): return x
167 |             def detokenizer(x): return x
168 | 
169 |         if bpe_src_code is not None and level == "bpe":
170 |             # load bpe merge file
171 |             merge_file = open(bpe_src_code, "r")
172 |             bpe = apply_bpe.BPE(codes=merge_file)
173 |             def segmenter(x): return bpe.process_line(x.strip())
174 |         elif conf["level"] == "char":
175 |             # split to chars
176 |             def segmenter(x): return list(x.strip())
177 |         else:
178 |             def segmenter(x): return x.strip()
179 | 
180 |         conf["preprocess"] = [tokenizer, segmenter]
181 |         conf["postprocess"] = [detokenizer]
182 |         # build model and load parameters into it
183 |         model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"])
184 |         model = build_model(
185 |             cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"])
186 |         model.load_state_dict(model_checkpoint["model_state"])
187 |         if conf["use_cuda"]:
188 |             model.cuda()
189 |         conf["model"] = model
190 |         print("Joey NMT model loaded successfully.")
191 |         
192 |         return conf
193 | 
194 |     def _update_config(self, config, new_src_vocab_path, new_trg_vocab_path,
195 |                        new_model_dir, new_ckpt_path):
196 |         """Overwrite the settings in the given config."""
197 |         config['data']['src_vocab'] = new_src_vocab_path
198 |         if config['model'].get('tied_embeddings', False):
199 |             config['data']['trg_vocab'] = new_src_vocab_path
200 |         else:
201 |             config['data']['trg_vocab'] = new_trg_vocab_path
202 |         config['training']['model_dir'] = new_model_dir
203 |         config['training']['load_model'] = new_ckpt_path
204 |         return config
205 | 
206 |     def _is_lowercase(self, src_vocab_path):
207 |         # Infer whether the model is built on lowercased data.
208 |         lowercase = True
209 |         with open(src_vocab_path, 'r') as ofile:
210 |             for line in ofile:
211 |                 if line != line.lower():
212 |                     lowercase = False
213 |                     break
214 |         return lowercase
215 | 
216 | # Doesn't look like these functions are ever called... 
217 | 
218 |     def _download_gdrive_file(self, file_id, destination):
219 |         """Download a file from Google Drive and store in local file."""
220 |         download_link = 'https://drive.google.com/uc?id={}'.format(file_id)
221 |         os.system(f'gdown -q -O {destination} {download_link}')
222 | 
223 |     def _download_github_file(self, github_raw_path, destination):
224 |         """Download a file from GitHub."""
225 |         os.system(f'wget -q -O {destination} {github_raw_path}')
226 | 
227 |     def _download(self, url, destination):
228 |         """Download file from Github or Googledrive."""
229 |         try:
230 |             if 'drive.google.com' in url:
231 |                 if url.startswith('https://drive.google.com/file'):
232 |                     file_id = url.split("/")[-1]
233 |                 elif url.startswith('https://drive.google.com/open?'):
234 |                     file_id = url.split('id=')[-1]
235 |                 self._download_gdrive_file(file_id, destination)
236 |             else:
237 |                 self._download_github_file(url, destination)
238 |         except:
239 |             print("Download failed, didn't recognize url {}.".format(url))
240 | 
241 | 


--------------------------------------------------------------------------------
/src/server/core/models/predict.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import ipdb
  3 | import logging
  4 | import re
  5 | 
  6 | import pandas as pd
  7 | from subword_nmt import apply_bpe
  8 | from polyglot.text import Text
  9 | from flask import current_app 
 10 | from subword_nmt import apply_bpe
 11 | from sacremoses import MosesTokenizer, MosesDetokenizer
 12 | from core.utils import load_line_as_data
 13 | from joeynmt.helpers import load_config, get_latest_checkpoint, \
 14 |     load_checkpoint
 15 | from joeynmt.vocabulary import build_vocab
 16 | from joeynmt.model import build_model
 17 | from joeynmt.prediction import validate_on_data
 18 | 
 19 | 
 20 | 
 21 | 
 22 | def load_model(model_dir, bpe_src_code=None, tokenize=None):
 23 |     """
 24 |     Start the bot. This means loading the model according to the config file.
 25 | 
 26 |     :param model_dir: Model directory of trained Joey NMT model.
 27 |     :param bpe_src_code: BPE codes for source side processing (optional).
 28 |     :param tokenize: If True, tokenize inputs with Moses tokenizer.
 29 |     :return:
 30 |     """
 31 |     conf = {}
 32 |     cfg_file = model_dir+"/config.yaml"
 33 | 
 34 |     logger = logging.getLogger(__name__)
 35 |     conf["logger"] = logger
 36 |     # load the Joey configuration
 37 |     cfg = load_config(cfg_file)
 38 | 
 39 |     # load the checkpoint
 40 |     if "load_model" in cfg['training'].keys():
 41 |         ckpt = cfg['training']["load_model"]
 42 |     else:
 43 |         ckpt = get_latest_checkpoint(model_dir)
 44 |         if ckpt is None:
 45 |             raise FileNotFoundError("No checkpoint found in directory {}."
 46 |                                     .format(model_dir))
 47 | 
 48 |     # prediction parameters from config
 49 |     conf["use_cuda"] = cfg["training"].get("use_cuda", False)
 50 |     conf["level"] = cfg["data"]["level"]
 51 |     conf["max_output_length"] = cfg["training"].get("max_output_length", None)
 52 |     conf["lowercase"] = cfg["data"].get("lowercase", False)
 53 | 
 54 |     # load the vocabularies
 55 |     src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt"
 56 |     trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt"
 57 | 
 58 |     conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file,
 59 |                             dataset=None, max_size=-1, min_freq=0)
 60 |     conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file,
 61 |                             dataset=None, max_size=-1, min_freq=0)
 62 | 
 63 |     # whether to use beam search for decoding, 0: greedy decoding
 64 |     if "testing" in cfg.keys():
 65 |         conf["beam_size"] = cfg["testing"].get("beam_size", 0)
 66 |         conf["beam_alpha"] = cfg["testing"].get("alpha", -1)
 67 |     else:
 68 |         conf["beam_size"] = 1
 69 |         conf["beam_alpha"] = -1
 70 | 
 71 |     # pre-processing
 72 |     if tokenize is not None:
 73 |         src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"])
 74 |         trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"])
 75 |         # tokenize input
 76 |         tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True)
 77 |         detokenizer = lambda x: trg_tokenizer.detokenize(
 78 |             x.split(), return_str=True)
 79 |     else:
 80 |         tokenizer = lambda x: x
 81 |         detokenizer = lambda x: x
 82 | 
 83 |     if bpe_src_code is not None and level == "bpe":
 84 |         # load bpe merge file
 85 |         merge_file = open(bpe_src_code, "r")
 86 |         bpe = apply_bpe.BPE(codes=merge_file)
 87 |         segmenter = lambda x: bpe.process_line(x.strip())
 88 |     elif conf["level"] == "char":
 89 |         # split to chars
 90 |         segmenter = lambda x: list(x.strip())
 91 |     else:
 92 |         segmenter = lambda x: x.strip()
 93 | 
 94 |     conf["preprocess"] = [tokenizer, segmenter]
 95 |     conf["postprocess"] = [detokenizer]
 96 |     # build model and load parameters into it
 97 |     model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"])
 98 |     model = build_model(cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"])
 99 |     model.load_state_dict(model_checkpoint["model_state"])
100 | 
101 |     if conf["use_cuda"]:
102 |         model.cuda()
103 |     conf["model"] = model
104 |     print("Joey NMT model loaded successfully.")
105 |     return conf
106 | 
107 | 
108 | class Predicter():
109 |     # def __init__(self):
110 |     #     pass
111 | 
112 |     def translate(self, message_text, model, src_vocab, trg_vocab, preprocess, postprocess,
113 |               logger, beam_size, beam_alpha, level, lowercase,
114 |               max_output_length, use_cuda):
115 |       """
116 |       Describes how to translate a text message.
117 | 
118 |       :param message_text: Slack command, could be text.
119 |       :param model: The Joey NMT model.
120 |       :param src_vocab: Source vocabulary.
121 |       :param trg_vocab: Target vocabulary.
122 |       :param preprocess: Preprocessing pipeline (a list).
123 |       :param postprocess: Postprocessing pipeline (a list).
124 |       :param beam_size: Beam size for decoding.
125 |       :param beam_alpha: Beam alpha for decoding.
126 |       :param level: Segmentation level.
127 |       :param lowercase: Lowercasing.
128 |       :param max_output_length: Maximum output length.
129 |       :param use_cuda: Using CUDA or not.
130 |       :return:
131 |       """
132 |       # ipdb.set_trace()
133 |       sentence = message_text.strip()
134 |       # remove emojis
135 |       emoji_pattern = re.compile("\:[a-zA-Z]+\:")
136 |       sentence = re.sub(emoji_pattern, "", sentence)
137 |       sentence = sentence.strip()
138 |       if lowercase:
139 |           sentence = sentence.lower()
140 |       for p in preprocess:
141 |           sentence = p(sentence)
142 | 
143 |       # load the data which consists only of this sentence
144 |       test_data, src_vocab, trg_vocab = load_line_as_data(lowercase=lowercase,
145 |           line=sentence, src_vocab=src_vocab, trg_vocab=trg_vocab, level=level)
146 | 
147 |       # generate outputs
148 |       score, loss, ppl, sources, sources_raw, references, hypotheses, \
149 |       hypotheses_raw, attention_scores = validate_on_data(
150 |           model, data=test_data, batch_size=1, level=level,
151 |           max_output_length=max_output_length, eval_metric=None,
152 |           use_cuda=use_cuda, beam_size=beam_size,
153 |           beam_alpha=beam_alpha, n_gpu=0)
154 | 
155 |       #  validate_on_data(model: Model, data: Dataset,
156 |       #                batch_size: int,
157 |       #                use_cuda: bool, max_output_length: int,
158 |       #                level: str, eval_metric: Optional[str],
159 |       #                n_gpu: int,
160 |       #                batch_class: Batch = Batch,
161 |       #                compute_loss: bool = False,
162 |       #                beam_size: int = 1, beam_alpha: int = -1,
163 |       #                batch_type: str = "sentence",
164 |       #                postprocess: bool = True,
165 |       #                bpe_type: str = "subword-nmt",
166 |       #                sacrebleu: dict = None) \
167 | 
168 |       # post-process
169 |       if level == "char":
170 |           response = "".join(hypotheses)
171 |       else:
172 |           response = " ".join(hypotheses)
173 | 
174 |       for p in postprocess:
175 |           response = p(response)
176 | 
177 |       return response
178 | 
179 |         
180 |     def predict_translation(self, source, model_dir, lc):
181 |         new_config_path = os.path.join(model_dir, 'config.yaml')
182 | 
183 |         # joenmt takes as input a file, so for the moment 
184 |         # I made the code to write the input into a file, ...
185 |         
186 |         if not os.path.exists(current_app.config['TEMP']):
187 |           os.mkdir(current_app.config['TEMP'])
188 | 
189 |         path_to_temp = current_app.config['TEMP']
190 | 
191 |         # if not os.path.exists("../../data/temps/"):
192 |         #       os.mkdir("../../data/temps/")
193 |         # path_to_temp = "../../data/temps/"
194 | 
195 |         if not os.path.exists(path_to_temp):
196 |               os.mkdir(path_to_temp)
197 |               
198 | 
199 |         src_input_file = 'src_input.bpe.txt'
200 |         # src_bpe_path = os.path.join(model_dir, 'src.bpe.model')
201 |         
202 |         # ted_link = 'https://raw.githubusercontent.com/juliakreutzer/masakhane-eval/master/data/multitarget-ted-filt.en.tsv'
203 |         os.system(f'echo {source} > {path_to_temp}input.tsv')
204 |         # src_data = SourceData(path_to_temp+'input.tsv', lc, \
205 |         #                             bpe_path=src_bpe_path, out_file=path_to_temp+src_input_file)
206 |         # sources = src_data.get_sources()
207 |         # ted_df = src_data.get_df()
208 |         
209 |         os.system(f"sed 's/@@ //g' {path_to_temp}{src_input_file} > {path_to_temp}src_input.txt")
210 | 
211 |         # os.system(f'echo {source} > input.txt')        
212 |         os.system(f'python -m joeynmt translate {new_config_path} < {path_to_temp}src_input.txt > {path_to_temp}trg_output_file')
213 | 
214 |         targets = post_process(path_to_temp+'trg_output_file', lc)
215 | # 
216 |         # with open('output.txt', 'r') as file:
217 |         #     output = file.read().replace('\n', '')
218 | 
219 |         # with open('trg_output_file', 'r') as file:
220 |         #     output = file.read().replace('\n', '')
221 | 
222 |         # return output
223 | 
224 |         return targets[0] if len(targets)>0 else ""
225 | 
226 | 
227 | class SourceData():
228 |   def __init__(self, data_link, lc, bpe_path, out_file):
229 |     self._src_df = pd.read_csv(data_link, sep='\t', header=None,
230 |                                names=['source'])
231 |     print("Loaded {} lines.".format(len(self._src_df)))
232 |     self._bpe_model = self.load_bpe(bpe_path)
233 |     self._src_df, self._sources = self.preprocess(out_file, lc)
234 |     self.lc = lc
235 |   
236 |   def get_df(self):
237 |     return self._src_df
238 |   
239 |   def get_sources(self):
240 |     return self._sources
241 | 
242 |   def preprocess(self, out_file, lc):
243 |     """Tokenize, (lowercase,) sub-word split.
244 |     
245 |     Using Polyglot since it was used for JW300.
246 |     Preprocess the source column of a dataframe object and write to file.
247 |   
248 |     Pipeline:
249 |     - tokenize
250 |     - split into sub-words
251 | 
252 |     Append pre-processed sources to dataframe."""
253 |     tokenized_sentences = []
254 |     bped_sentences = []
255 |     sources = []
256 |     with open(out_file, 'w') as ofile:
257 |       for i, row in self._src_df.iterrows():
258 |         sentence_i = Text(row[0]).sentences[0]
259 |         tokenized_sentence = ""
260 |         bped_sentence = ""
261 |         tokenized = " ".join(sentence_i.words)
262 |         sources.append(str(sentence_i))
263 |         if lc:
264 |           tokenized = tokenized.lower()
265 |         tokenized_sentence = tokenized
266 |         bped = self._bpe_model.process_line(tokenized)
267 |         bped_sentence = bped
268 |         ofile.write("{}\n".format(bped))
269 |         tokenized_sentences.append(tokenized_sentence)
270 |         bped_sentences.append(bped_sentence)
271 |     data = self._src_df.assign(
272 |         tokenized_sentences=tokenized_sentences)
273 |     data = data.assign(
274 |         bped_sentences=bped_sentences)
275 |     return data, sources
276 | 
277 |   def load_bpe(self, bpe_path):
278 |     with open(bpe_path, 'r') as ofile:
279 |       bpe_model = apply_bpe.BPE(codes=ofile)
280 |     return bpe_model
281 |   
282 | # Post-processing
283 | def post_process(output_file, lc):
284 |   """Load and detokenize translations.
285 |   
286 |   There is no given Polyglot detokenizer, so we do it by heuristics.
287 |   """
288 |   targets = []
289 |   with open(output_file, 'r') as ofile:
290 |     for line in ofile:
291 |       sent = line.strip()
292 |       sent = sent.replace('<pad>', '')
293 |       sent = re.sub(r'\s+([?.!"-,:’])', r'\1', sent)
294 |       sent = sent.replace('( ', '(').replace(' - ', '-').replace(' / ', '/').replace(' /', '/')
295 |       if lc:
296 |         # Cheap casing restoration... only first character but better than nothing.
297 |         sent = sent[0].upper() + sent[1:]
298 |       targets.append(sent)
299 |   return targets


--------------------------------------------------------------------------------
/src/client/src/components/translateCard.js:
--------------------------------------------------------------------------------
  1 | import React from 'react';
  2 | import { useState, useLayoutEffect,useRef, useEffect} from 'react';
  3 | import { Container, Row, Col, Form, Button, Modal, Toast, OverlayTrigger, Tooltip } from 'react-bootstrap';
  4 | import {CopyToClipboard} from 'react-copy-to-clipboard';
  5 | 
  6 | import MultiStepForm from './multiStepForm';
  7 | 
  8 | const MIN_TEXTAREA_HEIGHT = 200;
  9 | 
 10 | export default function TranslateCard() {
 11 |     const [input, setText] = useState("");
 12 |     const [translation, setTranslation] = useState('...');
 13 |     const [srcLanguages, setSrcLanguages] = useState([]);
 14 |     const [tgtLanguages, setTgtLanguages] = useState([]);
 15 |     const [show, setShow] = useState(false);
 16 |     const [src_lang, setSrc_Lang] = useState('English');
 17 |     const [tgt_lang, setTgt_Lang] = useState('Swahili');
 18 |     const [feedBackForm, setFeedBackForm] = useState({});
 19 |     const textareaRef = useRef(null);
 20 |     const textareaRef2= useRef(null);
 21 |     const [feedbackToken, setFeedbackToken] = useState(
 22 |         localStorage.getItem('feedbackToken') || ''
 23 |     );
 24 | 
 25 |     const [copySuccess, setCopySuccess] = useState('');
 26 |     const [showToast, setShowToast] = useState('');
 27 | 
 28 |     const handleClose = () => setShow(false);
 29 |     const handleShow = () => setShow(true);
 30 | 
 31 |     const copyToClipboard = () => {
 32 |         setCopySuccess('Translation Copied!');
 33 |         setShowToast(true);
 34 |     };
 35 | 
 36 |     const handleChangeSrc_Lang= (e) => {
 37 |         //localstorage
 38 |         const name = e.target.value
 39 |         localStorage.setItem('src_lang', name);
 40 | 
 41 |         //set state
 42 |         setSrc_Lang(name);
 43 |         //get target languages
 44 |         const target = srcLanguages.filter(x => x.name === name)
 45 |         const target_languages = target[0].targets 
 46 |         setTgtLanguages(target_languages)
 47 |         setTgt_Lang(target_languages[0].name)       
 48 |     };
 49 | 
 50 |     const handleChangeTgt_Lang = (e) => {
 51 |         //localstorage
 52 |         localStorage.setItem('tgt_lang', e.target.value);
 53 | 
 54 |         //set state
 55 |         setTgt_Lang(e.target.value);
 56 | 
 57 |         // console.log(e.target.value)
 58 |         
 59 |     };
 60 | 
 61 |     const handleTranslate = (e) => {
 62 |         console.log('translating ..')
 63 |         console.log(src_lang)
 64 |         console.log(tgt_lang)
 65 |         e.preventDefault()
 66 |     
 67 |         fetch( 
 68 |             '/translate', 
 69 |             {
 70 |                 method: 'post', 
 71 |                 // mode: 'no-cors',
 72 |                 body: JSON.stringify({input, src_lang, tgt_lang}),
 73 |                 headers: {
 74 |                     'Content-Type': 'application/json'
 75 |                   },
 76 |                 // credentials: 'same-origin',
 77 |             })
 78 |           .then(res => res.json())
 79 |           .then(data => {
 80 |             console.log({ data })
 81 |             // do something here
 82 |             setTranslation(data.output)
 83 |           })
 84 |     };
 85 | 
 86 |     const submitFeedBack = (formData) => {
 87 |         // first set state of feedback Form
 88 |         setFeedBackForm({...formData});
 89 |         // then submit feedback form to db here
 90 |         // here's where you write the function to push feedback to backend
 91 | 
 92 |         console.log({formData})
 93 | 
 94 |         fetch( 
 95 |             '/save', 
 96 |             {
 97 |                 method: 'post', 
 98 |                 // mode: 'no-cors',
 99 |                 body: JSON.stringify({
100 |                     src_lang: formData.src_lang,
101 |                     tgt_lang: formData.tgt_lang,
102 |                     accurate_translation:  formData.accurate_translation,
103 |                     know_src_lang: formData.know_src_lang,
104 |                     know_tgt_lang:  formData.know_tgt_lang,
105 |                     own_translation: formData.own_translation,
106 |                     text:  formData.text,
107 |                     translation: formData.translation,
108 |                     understand_translation: formData.understand_translation,
109 |                     feedbackToken: formData.feedbackToken
110 |             }),
111 |                 headers: {
112 |                     'Content-Type': 'application/json'
113 |                   },
114 |                 // credentials: 'same-origin',
115 |             })
116 |           .then(res => res.json())
117 |           .then(data => {
118 |             //console.log({data})
119 |             // do something here
120 |             handleClear()
121 |           })
122 | 
123 |     }
124 | 
125 | 
126 |     const handleClear = () => {
127 |         // clears text part
128 |         setText('');
129 |         // clear translation
130 |         setTranslation('...');
131 |     }
132 | 
133 |     useLayoutEffect(() => {
134 |         // Reset height - important to shrink on delete
135 |         textareaRef.current.style.height = "inherit";
136 |         // Set height
137 |         textareaRef.current.style.height = `${Math.max(
138 |           textareaRef.current.scrollHeight,
139 |           MIN_TEXTAREA_HEIGHT
140 |         )}px`;
141 |       }, [input]);
142 | 
143 |     useLayoutEffect(() => {
144 |         // Reset height - important to shrink on delete
145 |         textareaRef2.current.style.height = "inherit";
146 |         // Set height
147 |         textareaRef2.current.style.height = `${Math.max(
148 |           textareaRef2.current.scrollHeight,
149 |           MIN_TEXTAREA_HEIGHT
150 |         )}px`;
151 |       }, [input]);
152 | 
153 |     //   console.log({feedbackToken});
154 |     //   console.log({tgt_lang});
155 | 
156 |     // console.log({feedbackToken});
157 | 
158 |     let srcLang = [];
159 |     let tgtLang = [];
160 | 
161 |     useEffect(()=> {
162 |         // define fetch function 
163 |         let src = [];
164 |         let tgt = [];
165 |         const fetchLanguages = async ()=> {
166 |         await fetch( 
167 |             '/update', 
168 |             {
169 |                 method: 'get', 
170 |                 headers: {
171 |                     'Content-Type': 'application/json'
172 |                     },
173 |             })    
174 |         await fetch( 
175 |             '/translate', 
176 |             {
177 |                 method: 'get', 
178 |                 headers: {
179 |                     'Content-Type': 'application/json'
180 |                   },
181 |                 // credentials: 'same-origin',
182 |             })
183 |           .then(res => res.json())
184 |           .then(data => {
185 |               console.log({ data})
186 |             // do something here
187 |             setSrcLanguages(data)
188 |             setTgtLanguages(data[0].targets)
189 |         
190 |           })
191 |         
192 | 
193 |         }
194 |         // call fetch function
195 |         fetchLanguages()
196 | 
197 |     }, [])
198 |     // console.log(srcLanguages)
199 |     // console.log(tgtLanguages)
200 |     // console.log(tgt_lang)
201 | 
202 |     return (
203 |         <Container className="border">
204 |             
205 |             <Modal 
206 |                 scrollable={true} 
207 |                 show={show} 
208 |                 onHide={handleClose} 
209 |                 centered 
210 |                 size="lg"
211 |             >
212 |                 <Modal.Header closeButton style={{backgroundColor:'#F2F0E9'}}>
213 |                     <Col style={{textAlign: 'center'}}>
214 |                         <h4 style={{ fontSize: 14, color: '#717171' }}>GIVE FEEDBACK</h4>
215 |                         <p style={{ fontSize: 11, color: 'gray' }}>We appreciate your feedback and your contribution will help make our translation better.</p>
216 |                     </Col>
217 |                 </Modal.Header>
218 |                 <Modal.Body>
219 |                     <MultiStepForm 
220 |                         src_lang={src_lang} 
221 |                         tgt_lang={tgt_lang} 
222 |                         text={input} 
223 |                         translation={translation} 
224 |                         setShow={setShow}
225 |                         submitFeedBack={submitFeedBack}
226 |                         setFeedbackToken={setFeedbackToken}
227 |                         feedbackToken={feedbackToken}
228 |                     />
229 |                 </Modal.Body>
230 |             </Modal>
231 | 
232 |             <Row className="header" style={{ backgroundColor: 'aliceblue', height: 60, fontSize: '1rem', padding: '0.5rem 0.5rem' }}>
233 |                 <Col className="border-right" style={{marginBottom: 10}}>
234 |                     <Row>
235 |                         <Col md={6} xs={12}>
236 |                             <Form inline>
237 |                                 <Form.Group controlId="fromform">
238 |                                     <Form.Label>From: </Form.Label>
239 |                                     <Form.Control value={src_lang} style={{ border: 0, marginLeft: 10 }} as="select" size="sm" custom onChange={handleChangeSrc_Lang}>
240 |                                     {
241 |                                         srcLanguages && srcLanguages.map((option, index) => {
242 |                                         return (<option key={index} value={option.name}>{option.name}</option>)
243 |                                         })
244 |                                     }
245 |                                     </Form.Control>
246 |                                 </Form.Group>
247 |                             </Form>
248 |                         </Col>
249 |                         {/* <Col className="d-none d-sm-block">
250 |                              <Row>
251 |                             {
252 |                                 srcLanguages.length > 1 && srcLanguages
253 |                                 .filter(x => x.value !== src_lang)
254 |                                 .slice(0, 2)
255 |                                 .map((option, index) => {
256 |                                 return (
257 |                                     <Button key={option.id} variant="light" size="sm" onClick={() => setSrc_Lang(option.name)}>{option.name}</Button>                                   )
258 |                                 })
259 |                             }
260 |                             </Row>
261 |                         </Col> */}
262 |                     </Row>
263 |                 </Col>
264 |                 <Col style={{ marginLeft: '15px' }}>
265 |                     <Row>
266 |                         <Col md={6} xs={12}>
267 |                             <Form inline>
268 |                                 <Form.Group controlId="fromform" as={Row}>
269 |                                 <Form.Label>To: </Form.Label>
270 |                                     <Form.Control md={6} xs={12} value={tgt_lang} style={{ border: 0, marginLeft: 10 }} as="select" size="sm" custom onChange={handleChangeTgt_Lang}>
271 |                         
272 |                                     {
273 |                                         tgtLanguages.map((option, index) => {
274 |                                         return (<option key={index} value={option.name}>{option.name}</option>)
275 |                                         })
276 |                                     }
277 | 
278 |                                     </Form.Control>
279 |                                 </Form.Group>
280 |                             </Form>
281 |                         </Col>
282 |                         {/* <Col className="d-none d-sm-block">
283 |                             <Row>
284 |                             {
285 |                                 tgtLanguages.length > 1 && tgtLanguages
286 |                                 .filter(x => x.value !== tgt_lang)
287 |                                 .slice(0, 2)
288 |                                 .map((option, index) => {
289 |                                 return (
290 |                                     <Button key={option.id} variant="light" size="sm" onClick={() => setTgt_Lang(option.name)}>{option.name}</Button>                                   )
291 |                                 })
292 |                             }
293 |                             </Row>
294 |                         </Col> */}
295 |                     </Row>
296 |                 </Col>
297 |             </Row>
298 |             <Row style={{ minHeight: '250px', marginTop: '20px' }}>
299 |                 <Col md={6} xs={12} className="ml-1" style={{ paddingTop: '20px', paddingBottom: '20px', marginLeft: '10px' }}>
300 |                     <Form>
301 |                         <Form.Group controlId="Form.ControlTextarea">
302 |                             <Form.Control 
303 |                                 as="textarea"
304 |                                 placeholder="Enter Text" 
305 |                                 rows="3" 
306 |                                 name="text"
307 |                                 ref={textareaRef}
308 |                                 style={{ fontSize: 24, minHeight: MIN_TEXTAREA_HEIGHT, resize: 'none' }} 
309 |                                 value={input}
310 |                                 onChange={e => setText(e.target.value)}
311 |                             />
312 |                         </Form.Group>
313 |                     </Form>
314 |                     
315 |                     <Row>
316 |                         <Col md={10} xs={10}>
317 |                             <Button variant="primary" style={{ marginBottom: 10 }} onClick={handleTranslate}>Translate</Button>
318 |                         </Col>
319 |                         <Col md={2} xs={2} lg="2">
320 |                             <Button style = {{color:'grey'}} variant = 'link' size="sm" onClick={handleClear}><i className="fas fa-times"></i></Button>{' '}
321 |                         </Col>
322 |                     </Row>
323 |                 </Col>
324 |                 <Col style={{ paddingTop: '20px', paddingBottom: '20px' }}>
325 |                     <Form>
326 |                         <Form.Group controlId="Form.ControlTextarea2">
327 |                             <Form.Control 
328 |                                 controlid="translation"
329 |                                 as="textarea"
330 |                                 placeholder="..." 
331 |                                 rows="3" 
332 |                                 name="text"
333 |                                 ref={textareaRef2}
334 |                                 style={{ fontSize: 24, minHeight: MIN_TEXTAREA_HEIGHT, resize: 'none' }} 
335 |                                 value={translation}
336 |                                 readOnly
337 |                                 isInvalid={!translation}
338 |                                 // onChange={e => setText(e.target.value)}
339 |                                 // autoFocus={showToast}
340 |                             />
341 |                             {!translation && (
342 |                                 <Form.Control.Feedback type="invalid">
343 |                                     Sorry, there’s no translation for that phrase.
344 |                                 </Form.Control.Feedback>
345 |                             )}
346 |                         </Form.Group>
347 |                     </Form>
348 |                     
349 |                     <Row>
350 |                         <Col md={10} xs={10}>
351 |                             {/* <Button variant="light" size = 'sm' style={{ bottom: '10px' }} onClick={handleShow}>Give Feedback on Translation</Button> */}
352 |                         </Col>
353 |                         <Col md={2} xs={2} lg="2">
354 |                             <OverlayTrigger
355 |                                 placement='top'
356 |                                 overlay={
357 |                                 <Tooltip id={'tooltip-top'}>
358 |                                     Copy <strong>Translation</strong>.
359 |                                 </Tooltip>
360 |                                 }
361 |                             >
362 |                                 <CopyToClipboard text={translation} onCopy={copyToClipboard}>
363 |                                     <Button variant="light" size="sm"><i className="fa fa-copy"></i></Button>
364 |                                 </CopyToClipboard>
365 |                             </OverlayTrigger>
366 |                         </Col>
367 |                     </Row>
368 |                 </Col>
369 |             </Row>
370 | 
371 |             <div aria-live="polite" aria-atomic="true" style={{ position: 'relative' }}>
372 |                 <Toast
373 |                     onClose={() => setShowToast(false)} 
374 |                     show={showToast} 
375 |                     delay={3000} 
376 |                     autohide
377 |                     style={{
378 |                         position: 'absolute',
379 |                         bottom: 0,
380 |                         left: 0
381 |                     }}
382 |                 >
383 |                     <Toast.Body style={{color: 'black'}}>{copySuccess}</Toast.Body>
384 |                 </Toast>
385 |             </div>
386 |         </Container>
387 |     )
388 | }
389 | 


--------------------------------------------------------------------------------