├── nltkmodules.py ├── web ├── public │ ├── favicon.ico │ ├── logo192.png │ ├── logo512.png │ ├── robots.txt │ ├── manifest.json │ └── index.html ├── src │ ├── pages │ │ ├── NotFound.jsx │ │ ├── Home.jsx │ │ └── Record.jsx │ ├── setupTests.js │ ├── App.test.js │ ├── components │ │ ├── Navbar.jsx │ │ └── Message.jsx │ ├── index.js │ ├── App.css │ ├── App.jsx │ ├── index.css │ ├── logo.svg │ └── serviceWorker.js ├── .gitignore ├── package.json └── README.md ├── firestore └── config.py ├── main.py ├── LICENSE ├── requirements.txt ├── cogs ├── util.py └── record.py ├── summarisation ├── very_common_words.txt └── text_summarisation.py ├── .gitignore └── README.md /nltkmodules.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | 3 | nltk.download("book") 4 | -------------------------------------------------------------------------------- /web/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Groverkss/Synopsys/HEAD/web/public/favicon.ico -------------------------------------------------------------------------------- /web/public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Groverkss/Synopsys/HEAD/web/public/logo192.png -------------------------------------------------------------------------------- /web/public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Groverkss/Synopsys/HEAD/web/public/logo512.png -------------------------------------------------------------------------------- /web/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /web/src/pages/NotFound.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | export default () => { 4 | return

404

; 5 | }; 6 | -------------------------------------------------------------------------------- /web/src/setupTests.js: -------------------------------------------------------------------------------- 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes. 2 | // allows you to do things like: 3 | // expect(element).toHaveTextContent(/react/i) 4 | // learn more: https://github.com/testing-library/jest-dom 5 | import '@testing-library/jest-dom/extend-expect'; 6 | -------------------------------------------------------------------------------- /web/src/App.test.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { render } from '@testing-library/react'; 3 | import App from './App'; 4 | 5 | test('renders learn react link', () => { 6 | const { getByText } = render(); 7 | const linkElement = getByText(/learn react/i); 8 | expect(linkElement).toBeInTheDocument(); 9 | }); 10 | -------------------------------------------------------------------------------- /web/src/components/Navbar.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Navbar, NavbarBrand } from "reactstrap"; 3 | 4 | export default () => { 5 | return ( 6 | 7 | 8 | Synopsys 9 | 10 | 11 | ); 12 | }; 13 | -------------------------------------------------------------------------------- /web/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | 25 | src/firebase.js 26 | -------------------------------------------------------------------------------- /web/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /web/src/index.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactDOM from "react-dom"; 3 | import "./index.css"; 4 | import App from "./App"; 5 | import * as serviceWorker from "./serviceWorker"; 6 | import "bootstrap/dist/css/bootstrap.min.css"; 7 | 8 | ReactDOM.render( 9 | 10 | 11 | , 12 | document.getElementById("root") 13 | ); 14 | 15 | // If you want your app to work offline and load faster, you can change 16 | // unregister() to register() below. Note this comes with some pitfalls. 17 | // Learn more about service workers: https://bit.ly/CRA-PWA 18 | serviceWorker.unregister(); 19 | -------------------------------------------------------------------------------- /web/src/App.css: -------------------------------------------------------------------------------- 1 | .App { 2 | text-align: center; 3 | } 4 | 5 | .App-logo { 6 | height: 40vmin; 7 | pointer-events: none; 8 | } 9 | 10 | @media (prefers-reduced-motion: no-preference) { 11 | .App-logo { 12 | animation: App-logo-spin infinite 20s linear; 13 | } 14 | } 15 | 16 | .App-header { 17 | background-color: #282c34; 18 | min-height: 100vh; 19 | display: flex; 20 | flex-direction: column; 21 | align-items: center; 22 | justify-content: center; 23 | font-size: calc(10px + 2vmin); 24 | color: white; 25 | } 26 | 27 | .App-link { 28 | color: #61dafb; 29 | } 30 | 31 | @keyframes App-logo-spin { 32 | from { 33 | transform: rotate(0deg); 34 | } 35 | to { 36 | transform: rotate(360deg); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /web/src/components/Message.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Col, Card, CardBody } from "reactstrap"; 3 | 4 | export default (props) => { 5 | return ( 6 | 7 | 8 | 9 |
10 | {props.author} 11 | 12 | {props.datetime.toDate().toLocaleString()} 13 | 14 |
15 |
{props.content}
16 |
17 |
18 | 19 | ); 20 | }; 21 | -------------------------------------------------------------------------------- /web/src/App.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { BrowserRouter as Router, Switch, Route, Redirect } from "react-router-dom"; 3 | import "./App.css"; 4 | 5 | import Navbar from "./components/Navbar"; 6 | 7 | import Home from "./pages/Home"; 8 | import Record from "./pages/Record"; 9 | import NotFound from "./pages/NotFound"; 10 | 11 | const App = () => { 12 | return ( 13 | <> 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | ); 25 | }; 26 | 27 | export default App; 28 | -------------------------------------------------------------------------------- /web/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", 4 | "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif; 5 | -webkit-font-smoothing: antialiased; 6 | -moz-osx-font-smoothing: grayscale; 7 | background-color: #2f3136 !important; 8 | } 9 | 10 | code { 11 | font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New", monospace; 12 | } 13 | 14 | .discord-bg-primary { 15 | background-color: #36393f !important; 16 | } 17 | 18 | .discord-bg-secondary { 19 | background-color: #2f3136 !important; 20 | } 21 | 22 | .discord-bg-tertiary { 23 | background-color: #202225 !important; 24 | } 25 | 26 | .discord-fg-primary { 27 | color: #f2fff9 !important; 28 | } 29 | 30 | .discord-fg-secondary { 31 | color: #72767d !important; 32 | } 33 | -------------------------------------------------------------------------------- /firestore/config.py: -------------------------------------------------------------------------------- 1 | import firebase_admin 2 | from firebase_admin import credentials 3 | from firebase_admin import firestore 4 | 5 | class Firestore: 6 | def __init__(self): 7 | cred = credentials.Certificate("firestore/secret.json") 8 | firebase_admin.initialize_app(cred) 9 | self.db = firestore.client() 10 | 11 | def record_conversation(self, summary, keywords, messagelist): 12 | record_ref = self.db.collection(u"records").document() 13 | 14 | record = { 15 | u"datetime": messagelist[0]["datetime"], 16 | u"keywords": keywords, 17 | u"summary": summary, 18 | } 19 | 20 | record_ref.set(record) 21 | 22 | for message in messagelist: 23 | message_ref = self.db.collection(u"messages").document() 24 | message[u"record"] = f'/records/{record_ref.id}' 25 | message_ref.set(message) 26 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from discord.ext import commands 2 | from os import environ 3 | from pathlib import Path 4 | 5 | 6 | def main(): 7 | token = environ.get("BOT_TOKEN") 8 | if not token: 9 | print("NO TOKEN") 10 | return 11 | 12 | prefix = environ.get("BOT_PREFIX") 13 | if not prefix: 14 | print("NO PREFIX") 15 | return 16 | 17 | bot = commands.Bot(command_prefix=commands.when_mentioned_or(prefix)) 18 | 19 | def no_dm_check(ctx): 20 | if ctx.guild is None: 21 | raise commands.NoPrivateMEssage( 22 | "I refuse to take part in private conversations." 23 | ) 24 | 25 | return True 26 | 27 | # Add cogs 28 | cogs = [file.stem for file in Path("cogs").glob("*py")] 29 | for extension in cogs: 30 | bot.load_extension(f"cogs.{extension}") 31 | 32 | # Restrict bot usage to inside guild channels only. 33 | bot.add_check(no_dm_check) 34 | 35 | bot.run(token) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "web", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@testing-library/jest-dom": "^4.2.4", 7 | "@testing-library/react": "^9.3.2", 8 | "@testing-library/user-event": "^7.1.2", 9 | "bootstrap": "^4.5.2", 10 | "dotenv": "^8.2.0", 11 | "firebase": "^7.23.0", 12 | "react": "^16.13.1", 13 | "react-dom": "^16.13.1", 14 | "react-router-dom": "^5.2.0", 15 | "react-scripts": "3.4.3", 16 | "reactstrap": "^8.6.0" 17 | }, 18 | "scripts": { 19 | "start": "react-scripts start", 20 | "build": "react-scripts build", 21 | "test": "react-scripts test", 22 | "eject": "react-scripts eject" 23 | }, 24 | "eslintConfig": { 25 | "extends": "react-app" 26 | }, 27 | "browserslist": { 28 | "production": [ 29 | ">0.2%", 30 | "not dead", 31 | "not op_mini all" 32 | ], 33 | "development": [ 34 | "last 1 chrome version", 35 | "last 1 firefox version", 36 | "last 1 safari version" 37 | ] 38 | }, 39 | "devDependencies": {} 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Kunwar Shaanjeet Singh Grover 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.6.2 2 | async-timeout==3.0.1 3 | attrs==20.2.0 4 | CacheControl==0.12.6 5 | cachetools==4.1.1 6 | certifi==2020.6.20 7 | cffi==1.14.3 8 | chardet==3.0.4 9 | click==7.1.2 10 | crypto==1.4.1 11 | cryptography==3.1.1 12 | decorator==4.4.2 13 | discord==1.0.1 14 | discord.py==1.5.0 15 | firebase-admin==4.4.0 16 | gcloud==0.18.3 17 | google-api-core==1.22.4 18 | google-api-python-client==1.12.3 19 | google-auth==1.22.1 20 | google-auth-httplib2==0.0.4 21 | google-cloud-core==1.4.3 22 | google-cloud-firestore==1.9.0 23 | google-cloud-storage==1.31.2 24 | google-crc32c==1.0.0 25 | google-resumable-media==1.1.0 26 | googleapis-common-protos==1.52.0 27 | grpcio==1.32.0 28 | httplib2==0.18.1 29 | idna==2.10 30 | joblib==0.17.0 31 | jwcrypto==0.8 32 | msgpack==1.0.0 33 | multidict==4.7.6 34 | Naked==0.1.31 35 | networkx==2.5 36 | nltk==3.5 37 | numpy==1.19.2 38 | oauth2client==4.1.3 39 | protobuf==3.13.0 40 | pyasn1==0.4.8 41 | pyasn1-modules==0.2.8 42 | pycparser==2.20 43 | pycrypto==2.6.1 44 | python-jwt==3.3.0 45 | pytz==2020.1 46 | PyYAML==5.3.1 47 | regex==2020.9.27 48 | requests==2.24.0 49 | requests-toolbelt==0.9.1 50 | rsa==4.6 51 | shellescape==3.8.1 52 | six==1.15.0 53 | sseclient==0.0.27 54 | tqdm==4.50.2 55 | uritemplate==3.0.1 56 | urllib3==1.25.10 57 | yarl==1.6.0 58 | -------------------------------------------------------------------------------- /cogs/util.py: -------------------------------------------------------------------------------- 1 | from discord.ext import commands 2 | 3 | 4 | class Utilities(commands.Cog): 5 | def __init__(self, bot): 6 | self.bot = bot 7 | 8 | @commands.command(brief="Delete a conversation") 9 | async def delete(self, ctx, start=None, end=None): 10 | """Command to delete conversation""" 11 | if ctx.message.author.bot: 12 | return 13 | 14 | if not start or not end: 15 | await ctx.send( 16 | "Insufficient arguments!\n Arguements: " 17 | ) 18 | return 19 | 20 | channel = ctx.channel 21 | try: 22 | start_message = await channel.fetch_message(start) 23 | end_message = await channel.fetch_message(end) 24 | except: 25 | await ctx.send("Can not fetch message!") 26 | return 27 | 28 | try: 29 | raw_messages = await channel.history( 30 | before=end_message.created_at, 31 | after=start_message.created_at, 32 | oldest_first=True, 33 | ).flatten() 34 | except: 35 | await ctx.send("Can not get messages in that time range!") 36 | return 37 | 38 | raw_messages = [start_message, *raw_messages, end_message] 39 | 40 | for message in raw_messages: 41 | await message.delete() 42 | 43 | 44 | def setup(bot): 45 | bot.add_cog(Utilities(bot)) 46 | -------------------------------------------------------------------------------- /web/src/pages/Home.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from "react"; 2 | import { Container, Row, Col, Card, CardBody } from "reactstrap"; 3 | import { Link } from "react-router-dom"; 4 | import firebase from "../firebase"; 5 | 6 | export default () => { 7 | const [records, setRecords] = useState([]); 8 | 9 | useEffect(() => { 10 | firebase 11 | .firestore() 12 | .collection("records") 13 | .onSnapshot((snapshot) => { 14 | const newRecords = snapshot.docs.map((doc) => ({ id: doc.id, ...doc.data() })); 15 | setRecords(newRecords); 16 | }); 17 | }, []); 18 | 19 | if (records.length === 0) return null; 20 | return ( 21 | 22 | {console.log(records)} 23 | 24 | {records.map((record) => ( 25 | 26 | 31 | 32 |
Recording: {record.datetime.toDate().toLocaleString()}
33 |
34 | Keywords: {record.keywords.join(", ")} 35 |
36 |
37 |
38 | 39 | ))} 40 |
41 |
42 | ); 43 | }; 44 | -------------------------------------------------------------------------------- /web/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | Synopsys 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /summarisation/very_common_words.txt: -------------------------------------------------------------------------------- 1 | the 2 | lmao 3 | yeah 4 | lol 5 | tf 6 | ok 7 | of 8 | lol 9 | fuck 10 | bruh 11 | bro 12 | xd 13 | stuff 14 | haha 15 | hahaha 16 | https 17 | simp 18 | to 19 | and 20 | a 21 | in 22 | is 23 | it 24 | you 25 | that 26 | he 27 | was 28 | for 29 | on 30 | are 31 | with 32 | as 33 | I 34 | his 35 | they 36 | be 37 | at 38 | one 39 | have 40 | this 41 | from 42 | or 43 | had 44 | by 45 | not 46 | word 47 | but 48 | what 49 | some 50 | we 51 | can 52 | out 53 | other 54 | were 55 | all 56 | there 57 | when 58 | up 59 | use 60 | your 61 | how 62 | said 63 | an 64 | each 65 | she 66 | which 67 | do 68 | their 69 | time 70 | if 71 | will 72 | way 73 | about 74 | many 75 | then 76 | them 77 | write 78 | would 79 | like 80 | so 81 | these 82 | her 83 | long 84 | make 85 | thing 86 | see 87 | him 88 | two 89 | has 90 | look 91 | more 92 | day 93 | could 94 | go 95 | come 96 | did 97 | number 98 | sound 99 | no 100 | most 101 | people 102 | my 103 | over 104 | know 105 | water 106 | than 107 | call 108 | first 109 | who 110 | may 111 | down 112 | side 113 | been 114 | now 115 | find 116 | any 117 | new 118 | work 119 | part 120 | take 121 | get 122 | place 123 | made 124 | live 125 | where 126 | after 127 | back 128 | little 129 | only 130 | round 131 | man 132 | year 133 | came 134 | show 135 | every 136 | good 137 | me 138 | give 139 | our 140 | under 141 | name 142 | very 143 | through 144 | just 145 | form 146 | sentence 147 | great 148 | think 149 | say 150 | help 151 | low 152 | line 153 | differ 154 | turn 155 | cause 156 | much 157 | mean 158 | before 159 | move 160 | right 161 | boy 162 | old 163 | too 164 | same 165 | tell 166 | does 167 | set 168 | three 169 | want 170 | air 171 | well 172 | also 173 | play 174 | small 175 | end 176 | put 177 | home 178 | read 179 | hand 180 | port 181 | large 182 | spell 183 | add 184 | even 185 | land 186 | here 187 | must 188 | big 189 | high 190 | such 191 | follow 192 | act 193 | why 194 | ask 195 | men 196 | change 197 | went 198 | light 199 | kind 200 | off 201 | need 202 | house 203 | picture 204 | try 205 | us 206 | again 207 | animal 208 | point 209 | mother 210 | world 211 | near 212 | build 213 | self 214 | earth 215 | father -------------------------------------------------------------------------------- /web/src/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Run Script 132 | run.sh 133 | 134 | # Vim configs 135 | .vim 136 | *.swp 137 | 138 | # Firestore credentials 139 | secret.json 140 | -------------------------------------------------------------------------------- /web/README.md: -------------------------------------------------------------------------------- 1 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). 2 | 3 | ## Available Scripts 4 | 5 | In the project directory, you can run: 6 | 7 | ### `yarn start` 8 | 9 | Runs the app in the development mode.
10 | Open [http://localhost:3000](http://localhost:3000) to view it in the browser. 11 | 12 | The page will reload if you make edits.
13 | You will also see any lint errors in the console. 14 | 15 | ### `yarn test` 16 | 17 | Launches the test runner in the interactive watch mode.
18 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. 19 | 20 | ### `yarn build` 21 | 22 | Builds the app for production to the `build` folder.
23 | It correctly bundles React in production mode and optimizes the build for the best performance. 24 | 25 | The build is minified and the filenames include the hashes.
26 | Your app is ready to be deployed! 27 | 28 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. 29 | 30 | ### `yarn eject` 31 | 32 | **Note: this is a one-way operation. Once you `eject`, you can’t go back!** 33 | 34 | If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. 35 | 36 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own. 37 | 38 | You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it. 39 | 40 | ## Learn More 41 | 42 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). 43 | 44 | To learn React, check out the [React documentation](https://reactjs.org/). 45 | 46 | ### Code Splitting 47 | 48 | This section has moved here: https://facebook.github.io/create-react-app/docs/code-splitting 49 | 50 | ### Analyzing the Bundle Size 51 | 52 | This section has moved here: https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size 53 | 54 | ### Making a Progressive Web App 55 | 56 | This section has moved here: https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app 57 | 58 | ### Advanced Configuration 59 | 60 | This section has moved here: https://facebook.github.io/create-react-app/docs/advanced-configuration 61 | 62 | ### Deployment 63 | 64 | This section has moved here: https://facebook.github.io/create-react-app/docs/deployment 65 | 66 | ### `yarn build` fails to minify 67 | 68 | This section has moved here: https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify 69 | -------------------------------------------------------------------------------- /web/src/pages/Record.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from "react"; 2 | import { Container, Row, Col } from "reactstrap"; 3 | import firebase from "../firebase"; 4 | 5 | import Message from "../components/Message"; 6 | 7 | export default (props) => { 8 | const [messages, setMessages] = useState([]); 9 | const [keywords, setKeywords] = useState([]); 10 | const [summary, setSummary] = useState(""); 11 | 12 | useEffect(() => { 13 | const recordRef = firebase.firestore().collection("records").doc(props.match.params.id); 14 | async function getRecords() { 15 | const snapshot = await recordRef.get(); 16 | if (snapshot.empty) { 17 | console.log("Record not found!"); 18 | } else { 19 | setSummary(snapshot.data().summary); 20 | setKeywords(snapshot.data().keywords); 21 | } 22 | } 23 | 24 | const messagesRef = firebase.firestore().collection("messages"); 25 | async function getMessages() { 26 | const snapshot = await messagesRef 27 | .where("record", "==", `/records/${props.match.params.id}`) 28 | .get(); 29 | if (snapshot.empty) { 30 | console.log("No messages!"); 31 | } else { 32 | setMessages(snapshot.docs.map((doc) => ({ id: doc.id, ...doc.data() }))); 33 | } 34 | } 35 | 36 | getRecords(); 37 | getMessages(); 38 | }, [props.match.params.id]); 39 | 40 | if (messages.length === 0) return null; 41 | return ( 42 | 43 | 44 | 45 | 46 |

47 | # messages 48 |

49 | 50 | {messages.map((message) => ( 51 | 52 | ))} 53 | 54 |
55 | 56 | 57 | 58 |
59 |

60 | # summary 61 |

62 |
{summary}
63 |
64 |
65 |
66 | # keywords 67 |
68 |
{keywords.join(", ")}
69 |
70 |
71 | 72 |
73 |
74 | ); 75 | }; 76 | -------------------------------------------------------------------------------- /cogs/record.py: -------------------------------------------------------------------------------- 1 | from discord.ext import commands 2 | from json import dump, load 3 | from pprint import pprint 4 | 5 | from summarisation.text_summarisation import generate_summary, generate_keywords 6 | from firestore.config import Firestore 7 | 8 | 9 | async def convert_to_summary(ctx, start, end): 10 | """Returns (summary, keywords) from a start and end message""" 11 | channel = ctx.channel 12 | try: 13 | start_message = await channel.fetch_message(start) 14 | end_message = await channel.fetch_message(end) 15 | except: 16 | await ctx.send("Can not fetch message!") 17 | return 18 | 19 | try: 20 | raw_messages = await channel.history( 21 | before=end_message.created_at, 22 | after=start_message.created_at, 23 | oldest_first=True, 24 | ).flatten() 25 | except: 26 | await ctx.send("Can not get messages in that time range!") 27 | return 28 | 29 | raw_messages = [start_message, *raw_messages, end_message] 30 | clean_messages = [ 31 | { 32 | "content": message.clean_content, 33 | "datetime": message.created_at, 34 | "author": message.author.display_name, 35 | "reaction": sum(reaction.count for reaction in message.reactions), 36 | } 37 | for message in raw_messages 38 | if message.clean_content 39 | ] 40 | 41 | summary = generate_summary(clean_messages) 42 | keywords = generate_keywords(clean_messages) 43 | 44 | return summary, keywords, clean_messages 45 | 46 | 47 | class Record(commands.Cog): 48 | def __init__(self, bot): 49 | self.bot = bot 50 | self.firestore = Firestore() 51 | 52 | @commands.command(brief="Ping Pong") 53 | async def ping(self, ctx): 54 | """Command to test if bot is recieving requests""" 55 | await ctx.send("Pong") 56 | 57 | @commands.command(brief="Summarises a conversation between two timestamps") 58 | async def summarise(self, ctx, start=None, end=None): 59 | """Command to summarise conversation""" 60 | if ctx.message.author.bot: 61 | return 62 | 63 | if not start or not end: 64 | await ctx.send( 65 | "Insufficient arguments!\n Arguements: " 66 | ) 67 | return 68 | 69 | summary, keywords, clean_messages = await convert_to_summary( 70 | ctx, start, end 71 | ) 72 | 73 | if summary: 74 | summary = "```\n" + summary + "```" 75 | await ctx.send(summary) 76 | else: 77 | await ctx.send("```Not enough messages to generate summary```") 78 | 79 | if keywords: 80 | keyword_str = "Keywords: " 81 | for word in keywords: 82 | keyword_str += f"{word}, " 83 | 84 | keyword_str = "```\n" + keyword_str + "```" 85 | await ctx.send(keyword_str) 86 | else: 87 | await ctx.send("```Not enough messages to generate keywords```") 88 | 89 | @commands.command(brief="Records a conversation between two timestamps") 90 | async def record(self, ctx, start=None, end=None): 91 | """Command to summarise conversation""" 92 | if ctx.message.author.bot: 93 | return 94 | 95 | if not start or not end: 96 | await ctx.send( 97 | "Insufficient arguments!\n Arguements: " 98 | ) 99 | return 100 | 101 | summary, keywords, clean_messages = await convert_to_summary( 102 | ctx, start, end 103 | ) 104 | 105 | if not summary: 106 | summary = f'Not enough messages to generate summary' 107 | 108 | try: 109 | self.firestore.record_conversation( 110 | summary, keywords, clean_messages 111 | ) 112 | await ctx.send("```Conversation recorded successfully!```") 113 | except: 114 | await ctx.send("```Error while recording conversation```") 115 | 116 | 117 | def setup(bot): 118 | bot.add_cog(Record(bot)) 119 | -------------------------------------------------------------------------------- /summarisation/text_summarisation.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.corpus import stopwords 3 | from nltk.cluster.util import cosine_distance 4 | from nltk import sent_tokenize 5 | from nltk import word_tokenize 6 | from nltk.corpus import words 7 | import numpy as np 8 | import networkx as nx 9 | from collections import Counter 10 | import re 11 | 12 | def deEmojify(text): 13 | regrex_pattern = re.compile(pattern="[" 14 | u"\U0001F600-\U0001F64F" # emoticons 15 | u"\U0001F300-\U0001F5FF" # symbols & pictographs 16 | u"\U0001F680-\U0001F6FF" # transport & map symbols 17 | u"\U0001F1E0-\U0001F1FF" # flags (iOS) 18 | "]+", flags=re.UNICODE) 19 | return regrex_pattern.sub(r'', text) 20 | 21 | def create_common_word_list(list_of_sentences): 22 | file = open('summarisation/very_common_words.txt', 'r') 23 | file_string = file.read() 24 | common_word_list = file_string.split('\n') 25 | all_names = [sent['author'] for sent in list_of_sentences] 26 | all_names = list(set(all_names)) 27 | common_word_list.append(all_names) 28 | return common_word_list 29 | 30 | 31 | def read_messages(list_of_sentences): 32 | all_sents = [sent_tokenize(sent['content']) for sent in list_of_sentences] 33 | flat_list = [item for sublist in all_sents for item in sublist] 34 | pre_final_sentences = [] 35 | for sentence in flat_list: 36 | if len(sentence) > 5: 37 | pre_final_sentences.append(sentence) 38 | final_sentences = [] 39 | sentence_enders = ['.', '!', '?'] 40 | for sentence in pre_final_sentences: 41 | sentence = re.sub(r'@[a-zA-Z]*\s', '', sentence) 42 | sentence = re.sub('> ','',sentence) 43 | sentence = deEmojify(sentence) 44 | if sentence[-1] not in sentence_enders: 45 | sentence = sentence+'.' 46 | final_sentences.append(sentence) 47 | return final_sentences 48 | 49 | 50 | def sentence_similarity(sent1, sent2, common_word_list, stopwords=None): 51 | 52 | if stopwords is None: 53 | stopwords = [] 54 | sent1 = [w.lower() for w in sent1] 55 | sent2 = [w.lower() for w in sent2] 56 | 57 | all_words = list(set(sent1 + sent2)) 58 | 59 | vector1 = [0] * len(all_words) 60 | vector2 = [0] * len(all_words) 61 | 62 | # build the vector for the first sentence 63 | for w in sent1: 64 | if w not in stopwords and w not in common_word_list: 65 | vector1[all_words.index(w)] += 1 66 | 67 | # build the vector for the second sentence 68 | for w in sent2: 69 | if w not in stopwords and w not in common_word_list: 70 | vector2[all_words.index(w)] += 1 71 | 72 | return 1 - cosine_distance(vector1, vector2) 73 | 74 | 75 | def build_similarity_matrix(sentences, stop_words, common_word_list): 76 | # Create an empty similarity matrix 77 | similarity_matrix = np.zeros((len(sentences), len(sentences))) 78 | 79 | for idx1 in range(len(sentences)): 80 | for idx2 in range(len(sentences)): 81 | if idx1 == idx2: # ignore if both are same sentences 82 | continue 83 | similarity_matrix[idx1][idx2] = sentence_similarity( 84 | sentences[idx1], sentences[idx2], stop_words, common_word_list) 85 | 86 | return similarity_matrix 87 | 88 | def generate_summary(messages, percentage=8): 89 | stop_words = stopwords.words('english') 90 | summarize_text = [] 91 | common_word_list = create_common_word_list(messages) 92 | sentences = read_messages(messages) 93 | sentence_similarity_martix = build_similarity_matrix( 94 | sentences, stop_words, common_word_list) 95 | sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) 96 | scores = nx.pagerank(sentence_similarity_graph) 97 | ranked_sentence = sorted( 98 | ((scores[i], s) for i, s in enumerate(sentences)), reverse=True) 99 | top_n = len(ranked_sentence)//(100//percentage) 100 | for i in range(top_n): 101 | summarize_text.append("".join(ranked_sentence[i][1])) 102 | return " ".join(summarize_text) 103 | 104 | def generate_keywords(messages, n=8): 105 | sentences = read_messages(messages) 106 | stop_words = stopwords.words('english') 107 | words = [] 108 | common_word_list = create_common_word_list(messages) 109 | for sentence in sentences: 110 | sentence_words = word_tokenize(sentence) 111 | for i in range(len(sentence_words)): 112 | sentence_words[i] = sentence_words[i].lower() 113 | words.append(sentence_words) 114 | flat_words = [item for sublist in words for item in sublist] 115 | final_wordlist = [] 116 | for flat_word in flat_words: 117 | if flat_word not in stop_words and len(flat_word) > 3: 118 | final_wordlist.append(flat_word) 119 | freq = Counter(final_wordlist) 120 | common_keywords = (freq.most_common(n)) 121 | final_words = [] 122 | for c in common_keywords: 123 | if c[0] not in common_word_list: 124 | final_words.append(c[0]) 125 | return final_words 126 | -------------------------------------------------------------------------------- /web/src/serviceWorker.js: -------------------------------------------------------------------------------- 1 | // This optional code is used to register a service worker. 2 | // register() is not called by default. 3 | 4 | // This lets the app load faster on subsequent visits in production, and gives 5 | // it offline capabilities. However, it also means that developers (and users) 6 | // will only see deployed updates on subsequent visits to a page, after all the 7 | // existing tabs open on the page have been closed, since previously cached 8 | // resources are updated in the background. 9 | 10 | // To learn more about the benefits of this model and instructions on how to 11 | // opt-in, read https://bit.ly/CRA-PWA 12 | 13 | const isLocalhost = Boolean( 14 | window.location.hostname === 'localhost' || 15 | // [::1] is the IPv6 localhost address. 16 | window.location.hostname === '[::1]' || 17 | // 127.0.0.0/8 are considered localhost for IPv4. 18 | window.location.hostname.match( 19 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/ 20 | ) 21 | ); 22 | 23 | export function register(config) { 24 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) { 25 | // The URL constructor is available in all browsers that support SW. 26 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location.href); 27 | if (publicUrl.origin !== window.location.origin) { 28 | // Our service worker won't work if PUBLIC_URL is on a different origin 29 | // from what our page is served on. This might happen if a CDN is used to 30 | // serve assets; see https://github.com/facebook/create-react-app/issues/2374 31 | return; 32 | } 33 | 34 | window.addEventListener('load', () => { 35 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`; 36 | 37 | if (isLocalhost) { 38 | // This is running on localhost. Let's check if a service worker still exists or not. 39 | checkValidServiceWorker(swUrl, config); 40 | 41 | // Add some additional logging to localhost, pointing developers to the 42 | // service worker/PWA documentation. 43 | navigator.serviceWorker.ready.then(() => { 44 | console.log( 45 | 'This web app is being served cache-first by a service ' + 46 | 'worker. To learn more, visit https://bit.ly/CRA-PWA' 47 | ); 48 | }); 49 | } else { 50 | // Is not localhost. Just register service worker 51 | registerValidSW(swUrl, config); 52 | } 53 | }); 54 | } 55 | } 56 | 57 | function registerValidSW(swUrl, config) { 58 | navigator.serviceWorker 59 | .register(swUrl) 60 | .then(registration => { 61 | registration.onupdatefound = () => { 62 | const installingWorker = registration.installing; 63 | if (installingWorker == null) { 64 | return; 65 | } 66 | installingWorker.onstatechange = () => { 67 | if (installingWorker.state === 'installed') { 68 | if (navigator.serviceWorker.controller) { 69 | // At this point, the updated precached content has been fetched, 70 | // but the previous service worker will still serve the older 71 | // content until all client tabs are closed. 72 | console.log( 73 | 'New content is available and will be used when all ' + 74 | 'tabs for this page are closed. See https://bit.ly/CRA-PWA.' 75 | ); 76 | 77 | // Execute callback 78 | if (config && config.onUpdate) { 79 | config.onUpdate(registration); 80 | } 81 | } else { 82 | // At this point, everything has been precached. 83 | // It's the perfect time to display a 84 | // "Content is cached for offline use." message. 85 | console.log('Content is cached for offline use.'); 86 | 87 | // Execute callback 88 | if (config && config.onSuccess) { 89 | config.onSuccess(registration); 90 | } 91 | } 92 | } 93 | }; 94 | }; 95 | }) 96 | .catch(error => { 97 | console.error('Error during service worker registration:', error); 98 | }); 99 | } 100 | 101 | function checkValidServiceWorker(swUrl, config) { 102 | // Check if the service worker can be found. If it can't reload the page. 103 | fetch(swUrl, { 104 | headers: { 'Service-Worker': 'script' }, 105 | }) 106 | .then(response => { 107 | // Ensure service worker exists, and that we really are getting a JS file. 108 | const contentType = response.headers.get('content-type'); 109 | if ( 110 | response.status === 404 || 111 | (contentType != null && contentType.indexOf('javascript') === -1) 112 | ) { 113 | // No service worker found. Probably a different app. Reload the page. 114 | navigator.serviceWorker.ready.then(registration => { 115 | registration.unregister().then(() => { 116 | window.location.reload(); 117 | }); 118 | }); 119 | } else { 120 | // Service worker found. Proceed as normal. 121 | registerValidSW(swUrl, config); 122 | } 123 | }) 124 | .catch(() => { 125 | console.log( 126 | 'No internet connection found. App is running in offline mode.' 127 | ); 128 | }); 129 | } 130 | 131 | export function unregister() { 132 | if ('serviceWorker' in navigator) { 133 | navigator.serviceWorker.ready 134 | .then(registration => { 135 | registration.unregister(); 136 | }) 137 | .catch(error => { 138 | console.error(error.message); 139 | }); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Synopsys 2 | 3 | > "Perhaps the best test of a man's intelligence is his capacity for making a summary." - Lytton Strachey, English writer and critic. 4 | 5 | **Synopsys** is a discord-bot that summaries conversations and records them for future use. 6 | 7 | - Use [this link](https://summer-iser.web.app/) to checkout the demo webapp. 8 | - Checkout [this video](https://youtu.be/BpNmr4FwLCQ) for a working demo. 9 | 10 | So, go ahead! Use Synopsys to **find your TL;DRs**. 11 | 12 | ## Table of Contents 13 | 14 | - [About](#About) 15 | - [Our Idea](#Our-Idea) 16 | - [Technologies Involved](#Technologies-Involved) 17 | - [Automated Chat Summarization](#Automated-Chat-Summarization) 18 | - [Members](#Members) 19 | - [Basic Working Version](#Basic-Working-Version) 20 | - [Features](#Features) 21 | - [Installation](#Installation) 22 | - [Implementation](#Implementation) 23 | - [Further Ideas](#Further-Ideas) 24 | - [License](LICENSE) 25 | 26 | ## About 27 | 28 | ### Introduction 29 | 30 | Today, especially when the world lies in the grasps of the Corona-virus, a considerable chunk of information exchange happens in the form of **online chat conversations**. 31 | 32 | Such conversations involve a substantial amount of participants and a single conversation tends to span a wide range of topics interspersed with irrelevant segments. This results in the necessity of having proper **summarization**, so that people who were not present during a long (not necessarily coherent) conversation need not read through the big chain of chats to follow-up on it and can rather read a summary of the same, thus saving a lot of time. 33 | 34 | This calls for summarization algorithms that work on scraped data from chat services and yield a summary as required. 35 | 36 | ### About Discord 37 | 38 | Among the several instant messaging platforms available, **Discord** is one of the most popular ones. Because of its several innovative features like server-channel systems, awesome call quality, permission management and tools to integrate bots, Discord has become a major platform for people to collaborate, converse and share ideas. 39 | 40 | 41 | 42 | ### Our Idea 43 | 44 | In this project, we aim to make a **Discord bot** that effectively *summarizes* conversations and allows the user to keep a *record* of these summaries on a dedicated website. 45 | 46 | This not only allows the user to obtain a **automated-summary** of any given chain of chats, but also allows the user to have easy access to these stored summaries for future reference. 47 | 48 | ### Technologies Involved 49 | 50 | - Python 3 51 | - Firebase and Firestore 52 | - Google Cloud for hosting the bot on a virtual machine 53 | - `Discord.py` for functionality of discord bot 54 | - React for creating the front-end interface 55 | 56 | For a detailed description regarding the current implementation check [this](#Implementation). 57 | 58 | ### Automated Chat Summarization 59 | 60 | Automated-summarization of text has been applied to quite a lot of genres including varieties of articles, scientific papers and blogs. 61 | 62 | However, when compared to the above examples, very little work has been done in the field of chat summarization. This is because there are several problems associated with it due to fact that chats are inherently noisy, unstructured, informal and involves frequent shifts in topic. 63 | 64 | Our current working version uses a basic **cosine-similarity model** to generate a unique set of words (keywords) and thus use these keywords to evaluate the given data set of chats and return only *most unique sentences* as a part of the summary. This basic summarization is extractive by nature. For a detailed description on implementation of this model, look [here](#Implementation). 65 | 66 | In [Further Ideas](#Further-Ideas), we also attempt at constructing a better summarization algorithm, based on present research on the topic. 67 | 68 | ## Members 69 | 70 | The team involved in the project comprises of [Kunwar Shaanjeet Singh Grover](https://github.com/Groverkss), [Vishva Saravanan](https://github.com/v15hv4), [Mayank Goel](https://github.com/MayankGoel28) and [Alapan Chaudhuri](https://github.com/banrovegrie), respectively. 71 | 72 | # Basic Working Version 73 | 74 | ### Features 75 | 76 | - Easy to use conversation summarization based on discord messages 77 | - Sick of scrolling back thousands of messages to get an important conversation you had? Record the conversation and review it again anytime the web-interface which gives a summary of the conversation as well as the keywords. 78 | - Can be added to any required server 79 | 80 | ## Installation 81 | 82 | :warning: **This project uses Python 3**: Usage of Python 2 may have varying effect 83 | 84 | - Create a virtual environment and install dependencies: 85 | 86 | ```python 87 | $ python3 -m venv .env 88 | $ . .env/bin/activate 89 | $ pip3 install -r requirements.txt 90 | ``` 91 | 92 | - Install the nltk corpus required: 93 | 94 | ```python 95 | $ python3 nltkmodules.py 96 | ``` 97 | 98 | - Export the required environment variables: 99 | 100 | ```bash 101 | $ export BOT_TOKEN="TOKEN_FOR_DISCORD_BOT" 102 | $ export BOT_PREFIX="PREFIX_FOR_BOT" 103 | ``` 104 | 105 | - Run the bot: 106 | 107 | ```python 108 | python3 main.py 109 | ``` 110 | 111 | To add the record functionality, you need to connect the bot to a firestore database. Place the serviceAccount.json as firestore/secret.json. This allows the bot to use the record functionality to record database on the corresponding firestore database. 112 | 113 | 114 | ## Implementation 115 | 116 | ### Extracting Data 117 | 118 | The discord bot works by obtaining all the messages between the given starting message id and the ending message id. The bot then uses the text summarizer we built and obtains keywords and a short summary. 119 | 120 | ### Summarization 121 | 122 | The chat summarizer works on the mathematical principle of cosine similarity for non-zero vectors. 123 | 124 | For this, we have represented each line as a vector, of unique words, quantifying it on basis of how "important" or frequent it is, and this idea is done using a graph-based TextRank algorithm on the similarity matrix generated on the above vectors. 125 | 126 | Additional challenges were cleaning and parsing the data to include only relevant keywords, and this involved removal of stopwords and manual addition of common words. Additionally, discord usernames and other special characters like emojis were removed. 127 | 128 | The summarizer also outputs a list of keywords, on basis of frequency. This list is also cleaned for stopwords and other common words that do not convey the meaning of a sentence. 129 | 130 | ### Web 131 | 132 | The output (after text summarization) is then stored on a Firebase (Firestore) database, which is exposed by a ReactJS app. 133 | 134 | The webapp allows to view the recordings anytime with a summary and keywords. 135 | 136 | Link to the mentioned web-app: [Synopsis App](https://summer-iser.web.app/). 137 | 138 | ## Further Ideas 139 | 140 | Now that we have explained how our working version deals with summarization, we would like to elaborate upon how we plan to **better the summarization algorithm**. 141 | 142 | Given, a conversation as data-set in the form of a series of chats, we shall first remove noise in the form of spelling errors and use text segmentation to formalize the chats to some extent. 143 | 144 | Then, we differentiate chunks of conversation using topic modeling and then using similarity-index upon the few sets of topics to segregate the large chunk of chats. 145 | 146 | Once we have identified the primary topic (**tag**) of a certain series of chats, we build a semantic space of words. With the help of a co-occurrence **HAL** model, we use the given space we calculate cumulative scores of sentences. Using these scores, we include sentences and generate required summary. 147 | 148 | - **Text Segmentation:** We can use Longest Contiguous Messages (LCM) for this. 149 | - **Topic Modeling:** Latent Dirichlet Allocation can be used for this. 150 | - **Summary Generation:** HAL space can be used to create a probabilistic model to generate scores by normalizing the count of terms. 151 | --------------------------------------------------------------------------------