├── app ├── __init__.py ├── UPLOADS │ └── .gitkeep ├── utils │ ├── __init__.py │ └── utils.py ├── services │ ├── __init__.py │ ├── db.py │ ├── speech2text.py │ ├── infox.py │ └── create_embedding.py ├── config.py └── app.py ├── .gitignore └── requirements.txt /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/UPLOADS/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/services/db.py: -------------------------------------------------------------------------------- 1 | from flask_pymongo import PyMongo 2 | 3 | mongo = PyMongo() 4 | 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | INFOX_venv/ 3 | infox_env/ 4 | venv/ 5 | latest_silero_models.yml 6 | .env -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class Config: 4 | UPLOAD_FOLDER = os.environ.get('UPLOAD_FOLDER') 5 | MONGO_URI = os.environ.get('MONGO_URI') -------------------------------------------------------------------------------- /app/services/speech2text.py: -------------------------------------------------------------------------------- 1 | from app.utils.utils import load_S2T, device 2 | from glob import glob 3 | from loguru import logger 4 | 5 | def speech2text(wav_file: str) -> str: 6 | """ 7 | Converts audio to text 8 | 9 | Args: 10 | wav_file: filepath for the saved wav file 11 | 12 | Returns: 13 | Transcribed text 14 | """ 15 | model, decoder, read_batch, split_into_batches, prepare_model_input = load_S2T() 16 | logger.info("Loaded S2T model") 17 | wav_files = glob(wav_file) 18 | batches = split_into_batches(wav_files, batch_size=10) 19 | processed_input = prepare_model_input(read_batch(batches[0]), device=device) 20 | output = model(processed_input) 21 | return decoder(output[0].cpu()) -------------------------------------------------------------------------------- /app/services/infox.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | from .db import mongo 4 | from .speech2text import speech2text 5 | from app.utils.utils import cosine_sim 6 | from loguru import logger 7 | 8 | def infox( wav_filepath: str, qa_name: str) -> str: 9 | """ 10 | Converts the audio file to text and returns the corresponding answer to the most matched question 11 | 12 | Args: 13 | wav_filepath: filepath of the saved audio file 14 | qa_name: name of QA stored in the database 15 | 16 | Returns: 17 | Answer of the most matched question 18 | """ 19 | transcribed_text = speech2text(wav_filepath) 20 | logger.info(f"Transcribed text: {transcribed_text}") 21 | 22 | data = mongo.db.embeddings.find_one({"QA_NAME": qa_name}) 23 | QA = data['QA'] 24 | QA_EMBEDDINGS = np.array(data['QA_embeddings'], dtype=np.float32) 25 | 26 | QUESTIONS = [key for key, value in QA.items()] 27 | idx = cosine_sim(QA_EMBEDDINGS, transcribed_text) 28 | 29 | question = QUESTIONS[idx] 30 | logger.info(f"The most matched question is: {question}") 31 | text = QA[question] 32 | return text 33 | 34 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | antlr4-python3-runtime==4.8 2 | appdirs==1.4.4 3 | audioread==2.1.9 4 | bcrypt==3.2.0 5 | Bcrypt-Flask==1.0.2 6 | certifi==2021.10.8 7 | cffi==1.15.0 8 | charset-normalizer==2.0.12 9 | click==8.0.4 10 | decorator==5.1.1 11 | dnspython==2.2.1 12 | filelock==3.6.0 13 | Flask==2.0.3 14 | Flask-Cors==3.0.10 15 | Flask-PyMongo==2.3.0 16 | Flask-JWT==0.3.2 17 | huggingface-hub==0.4.0 18 | idna==3.3 19 | itsdangerous==2.1.1 20 | Jinja2==3.0.3 21 | joblib==1.1.0 22 | librosa==0.9.1 23 | llvmlite==0.38.0 24 | loguru==0.6.0 25 | MarkupSafe==2.1.1 26 | nltk==3.7 27 | numba==0.55.1 28 | numpy==1.20.1 29 | omegaconf==2.1.1 30 | packaging==21.3 31 | Pillow==9.0.1 32 | pooch==1.6.0 33 | pycparser==2.21 34 | pymongo==4.0.2 35 | pyparsing==3.0.7 36 | python-dotenv==0.20.0 37 | PyYAML==6.0 38 | regex==2022.3.15 39 | requests==2.27.1 40 | resampy==0.2.2 41 | sacremoses==0.0.49 42 | scikit-learn==1.0.2 43 | scipy==1.8.0 44 | sentence-transformers==2.2.0 45 | sentencepiece==0.1.96 46 | six==1.16.0 47 | SoundFile==0.10.3.post1 48 | sox==1.4.1 49 | threadpoolctl==3.1.0 50 | tokenizers==0.11.6 51 | torch==1.11.0 52 | torchaudio==0.11.0 53 | torchvision==0.12.0 54 | tqdm==4.63.0 55 | transformers==4.17.0 56 | typing-extensions==4.1.1 57 | urllib3==1.26.9 58 | Werkzeug==2.0.3 59 | -------------------------------------------------------------------------------- /app/services/create_embedding.py: -------------------------------------------------------------------------------- 1 | from app.utils.utils import load_embedding_model 2 | from app.services.db import mongo 3 | from typing import Dict 4 | from loguru import logger 5 | 6 | def create_embeddings(username:str, QA_NAME:str, QA: Dict, TITLE:str, DESCRIPTION:str, IMAGE): 7 | """ 8 | Saving the embedding of the question in the QA pairs 9 | 10 | Args: 11 | username: username of the user 12 | QA_NAME: name of QA provided 13 | QA: A dict containing question-answer 14 | TITLE: Title for QA 15 | DESCRIPTION: Description of the chatbot 16 | 17 | Returns: 18 | None 19 | """ 20 | embedding_model = load_embedding_model() 21 | logger.info("Embedding model loaded") 22 | 23 | QUESTIONS = [key for key, value in QA.items()] 24 | QA_embeddings = embedding_model.encode(QUESTIONS).tolist() 25 | logger.info("Embeddings created") 26 | 27 | mongo.db.embeddings.insert_one({"username": username, 28 | "QA_NAME": QA_NAME, 29 | "QA": QA, 30 | "QA_embeddings": QA_embeddings, 31 | "Title": TITLE, 32 | "Description": DESCRIPTION, 33 | "Image": IMAGE}) 34 | 35 | logger.info("Embeddings saved") -------------------------------------------------------------------------------- /app/utils/utils.py: -------------------------------------------------------------------------------- 1 | from flask import request 2 | from sentence_transformers import SentenceTransformer 3 | from sentence_transformers.util import cos_sim 4 | import torch 5 | import librosa 6 | import soundfile as sf 7 | from functools import wraps 8 | 9 | device = torch.device('cpu') 10 | 11 | def load_embedding_model(model_name='all-MiniLM-L6-v2'): 12 | """Loading embedding model from sentence-transformers""" 13 | 14 | return SentenceTransformer(model_name) 15 | 16 | def load_S2T(repo_dir='snakers4/silero-models', model_name='silero_stt'): 17 | """Loading speech2text model""" 18 | 19 | model, decoder, utils = torch.hub.load(repo_or_dir=repo_dir, 20 | model=model_name, 21 | language='en', 22 | device=device) 23 | 24 | (read_batch, split_into_batches, read_audio, prepare_model_input) = utils 25 | return model, decoder, read_batch, split_into_batches, prepare_model_input 26 | 27 | def cosine_sim(QA_embeddings, text): 28 | """Finding the cosine similarity between the provided arrays""" 29 | 30 | text_emb = load_embedding_model().encode(text) 31 | return torch.argmax(cos_sim(QA_embeddings, text_emb)) 32 | 33 | def convert(inputfile): 34 | """Converting the sample rate of uploaded audio file""" 35 | 36 | y, sr = librosa.load(inputfile, sr=16000) 37 | sf.write(inputfile, y, sr) 38 | 39 | 40 | #decorator for token authorization 41 | def token_required(f): 42 | @wraps(f) 43 | def decorated(*args, **kwargs): 44 | token = None 45 | 46 | if 'x-access-token' in request.headers: 47 | token = request.headers['x-access-token'] 48 | 49 | if not token: 50 | return jsonify({'message':'Token is missing'}), 401 51 | 52 | try: 53 | data = jwt.decode(token, app.config['SECRET_KEY']) 54 | current_user = mongo.db.users.find_one({'username': data['username']}) 55 | except: 56 | return jsonify({'message': 'Token is invalid'}), 401 57 | 58 | return f(current_user, *args, **kwargs) 59 | 60 | return decorated 61 | -------------------------------------------------------------------------------- /app/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, after_this_request, jsonify, request 2 | from flask_cors import CORS 3 | from flask_bcrypt import Bcrypt 4 | 5 | import os 6 | 7 | from .services.db import mongo 8 | from .services.create_embedding import create_embeddings 9 | from .services.infox import infox 10 | from .utils.utils import convert, token_required 11 | from .config import Config 12 | 13 | app = Flask(__name__) 14 | app.config.from_object(Config) 15 | bcrypt = Bcrypt(app) 16 | CORS(app) 17 | mongo.init_app(app) 18 | 19 | 20 | @app.route("/api/register/", methods=['POST']) 21 | def register(): 22 | """Register a user""" 23 | 24 | first_name = request.json.get('firstname') 25 | last_name = request.json.get('lastname') 26 | username = request.json.get('username') 27 | email = request.json.get('email') 28 | password = request.json.get('password') 29 | 30 | user_in_db = mongo.db.users.find_one({'username': username}) or mongo.db.users.find_one({'email': email}) 31 | if user_in_db: 32 | return jsonify({"message": "User with provided username/email already exists, please try with another one."}) 33 | 34 | mongo.db.users.insert_one({'first_name': first_name, 'last_name': last_name, 'username': username, 'email': email, 'password': password}) 35 | return jsonify({"message": "User created"}) 36 | 37 | 38 | @app.route("/api/login/", methods=['POST']) 39 | def login(): 40 | """LogIn the user""" 41 | 42 | username = request.json.get('username') 43 | user_in_db = mongo.db.users.find_one({'username': username}) 44 | logger.info(user_in_db['password']) 45 | 46 | if not user_in_db: 47 | return make_response('No user with this username', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}) 48 | 49 | if user_in_db['password'] == request.json.get("password"): 50 | token = jwt.encode({'username' : user_in_db['username'], 'email' : user_in_db['email']}, app.config['SECRET_KEY']) 51 | return jsonify({"Token": token.decode('UTF-8')}) 52 | 53 | else: 54 | return make_response('Could not verify!', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}) 55 | 56 | @app.route("/api/getUser/", methods=["GET"]) 57 | @token_required 58 | def get_user(current_user): 59 | """Get all the embeddings for the QA provided""" 60 | 61 | data = mongo.db.users.find({"username": current_user['username']}) 62 | list_data = list(data) 63 | json_data = dumps(list_data) 64 | return jsonify({"User Details": json_data}) 65 | 66 | @app.route("/api/createEmbeddings/", methods=["POST"]) 67 | @token_required 68 | def create_embedding(): 69 | """Create and save the embeddings for the QA provided""" 70 | 71 | username = current_user['username'] 72 | # username = request.json.get('username') 73 | QA_NAME = request.json.get('qa_name') 74 | QA = request.json.get("QA") 75 | TITLE = request.json.get("title") 76 | DESCRIPTION = request.json.get("description") 77 | IMAGE = request.json.get("image") 78 | 79 | create_embeddings(username, QA_NAME, QA, TITLE, DESCRIPTION, IMAGE) 80 | return jsonify({"message": "Embeddings created successfully"}) 81 | 82 | 83 | @app.route("/api/getEmbeddings//", methods=["GET"]) 84 | @token_required 85 | def get_embedding(current_user, qa_name): 86 | """Get all the embeddings for the QA provided""" 87 | 88 | data = mongo.db.embeddings.find({"username": current_user['username'], "QA_NAME": qa_name}) 89 | list_data = list(data) 90 | json_data = dumps(list_data) 91 | return jsonify({"Embedding data": json_data}) 92 | 93 | @app.route("/api/getQAs/", methods=["GET"]) 94 | @token_required 95 | def get_all_questions(current_user): 96 | """Get all the embeddings for the users""" 97 | 98 | data = mongo.db.embeddings.find({"username": current_user['username']}) 99 | list_data = list(data) 100 | json_data = dumps(list_data) 101 | return jsonify({"Questions": json_data}) 102 | 103 | @app.route("/api/app/get_all/") 104 | def get_all(): 105 | data = mongo.db.embeddings.find() 106 | list_data = list(data) 107 | json_data = dumps(list_data) 108 | return jsonify(json_data) 109 | 110 | @app.route("/api/app//", methods=['POST']) 111 | def main(qa_name): 112 | """End2End infox application""" 113 | 114 | wav_file = request.files.get("audio_file") 115 | file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'wav_file.wav') 116 | wav_file.save(file_path) 117 | convert(file_path) 118 | 119 | @after_this_request 120 | def remove_wavfile(response): 121 | os.remove(file_path) 122 | return response 123 | 124 | output_text = infox(file_path, qa_name) 125 | return {"output": output_text} 126 | 127 | 128 | @app.route("/healthz/") 129 | def health(): 130 | """Health check for the api""" 131 | 132 | return "INFOX-api is up and running" 133 | --------------------------------------------------------------------------------