├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── app.py ├── requirements.txt ├── static └── css │ ├── btnout.css │ ├── styles.css │ └── title.css └── templates ├── input.html └── output.html /.gitattributes: -------------------------------------------------------------------------------- 1 | static/* linguist-vendored 2 | templates/* linguist-vendored 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store 3 | 4 | *.cover 5 | .hypothesis/ 6 | .pytest_cache/ 7 | 8 | # Translations 9 | *.mo 10 | *.pot 11 | 12 | # Django stuff: 13 | *.log 14 | local_settings.py 15 | db.sqlite3 16 | 17 | # Flask stuff: 18 | instance/ 19 | .webassets-cache 20 | 21 | # Scrapy stuff: 22 | .scrapy 23 | 24 | # Sphinx documentation 25 | docs/_build/ 26 | 27 | # PyBuilder 28 | target/ 29 | 30 | # Jupyter Notebook 31 | .ipynb_checkpoints 32 | 33 | # pyenv 34 | .python-version 35 | 36 | # celery beat schedule file 37 | celerybeat-schedule 38 | 39 | # SageMath parsed files 40 | *.sage.py 41 | 42 | # Environments 43 | .env 44 | .venv 45 | env/ 46 | venv/ 47 | ENV/ 48 | env.bak/ 49 | venv.bak/ 50 | 51 | # Spyder project settings 52 | .spyderproject 53 | .spyproject 54 | 55 | # Rope project settings 56 | .ropeproject 57 | 58 | # mkdocs documentation 59 | /site 60 | 61 | # mypy 62 | .mypy_cache/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ian Ramzy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Article Summary Deep Learning 2 | [![Author](https://img.shields.io/badge/Author-ianramzy-brightgreen.svg)](https://ianramzy.com) 3 | ![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg) 4 | [![Donate](https://img.shields.io/badge/Donate-PayPal-brightgreen.svg)](https://paypal.me/ianramzy) 5 | ![GitHub repo size](https://img.shields.io/github/repo-size/ianramzy/article-summary-deep-learning.svg) 6 | [![Repo Link](https://img.shields.io/badge/Repo-Link-black.svg)](https://github.com/ianramzy/article-summary-deep-learning) 7 | 8 | 📖 Using deep learning and scraping to analyze/summarize articles! Just drop in any URL! 9 | For images, instructions and more see the project page at: 10 | # → https://ianramzy.com/project/article-summary-deep-learning.html 11 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, request, jsonify 2 | from bs4 import BeautifulSoup 3 | import requests 4 | import re 5 | import spacy 6 | from spacy import displacy 7 | from collections import Counter 8 | import textacy.extract 9 | 10 | nlp = spacy.load('en_core_web_sm') 11 | 12 | app = Flask(__name__) 13 | 14 | 15 | def url_to_string(url): 16 | if not re.match('(?:http|ftp|https)://', url): 17 | url = 'http://{}'.format(url) 18 | res = requests.get(url) 19 | html = res.text 20 | soup = BeautifulSoup(html, 'lxml') 21 | for script in soup(["script", "style", 'aside']): 22 | script.extract() 23 | return " ".join(re.split(r'[\n\t]+', soup.get_text())) 24 | 25 | 26 | def findNER(article): 27 | entities = [] 28 | for ent in article.ents: 29 | if ent.label_ in ["ORG", "PERSON", "NORP", "FAC", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART"]: 30 | entities.append(ent) 31 | no_dup_entities = [x.text for x in entities] 32 | top_entities = Counter(no_dup_entities).most_common(4) 33 | for i in range(len(top_entities)): 34 | top_entities[i] = top_entities[i][0] 35 | return top_entities 36 | 37 | 38 | def printFacts(top_entities, article): 39 | strStatements = [] 40 | statements = textacy.extract.semistructured_statements(article, top_entities[0]) 41 | print("Here are the things I know about " + top_entities[0] + ": ") 42 | for statement in statements: 43 | subject, verb, fact = statement 44 | # print(f" - {fact}") 45 | strStatements.append(str(fact).split(".")[0]) # truncate at . 46 | # print(" ") 47 | print(strStatements) 48 | return strStatements 49 | 50 | 51 | def renderPicture(article): 52 | colors = {"ORG": "linear-gradient(90deg, #aa9cfc, #fc9ce7)"} 53 | options = {"ents": ["ORG", "PERSON", "NORP", "FAC", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART"], 54 | "bg": "#white", "color": "black", "font": "Montserrat"} 55 | return displacy.render(article, style="ent", page=True, options=options) 56 | 57 | 58 | @app.route('/') 59 | def student(): 60 | return render_template("input.html") 61 | 62 | 63 | @app.route('/result', methods=['POST', 'GET']) 64 | def result(): 65 | if request.method == 'POST': 66 | value = request.form.get('Name') 67 | print("Recieved: " + value) 68 | article = nlp(url_to_string(value)) 69 | top_entities = findNER(article) 70 | facts = printFacts(top_entities, article) 71 | renderPicture(article) 72 | visualization = renderPicture(article) 73 | 74 | return render_template("output.html", top_entities=top_entities, facts=facts, visualization=visualization) 75 | 76 | 77 | if __name__ == '__main__': 78 | app.run(debug=True) 79 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | beautifulsoup4==4.7.1 3 | blis==0.2.4 4 | cachetools==3.1.1 5 | certifi==2019.6.16 6 | chardet==3.0.4 7 | Click==7.0 8 | cymem==2.0.2 9 | cytoolz==0.9.0.1 10 | decorator==4.4.0 11 | en-core-web-sm==2.1.0 12 | Flask==1.0.3 13 | idna==2.8 14 | itsdangerous==1.1.0 15 | Jinja2==2.10.1 16 | joblib==0.13.2 17 | jsonschema==3.0.1 18 | lxml==4.3.4 19 | MarkupSafe==1.1.1 20 | murmurhash==1.0.2 21 | networkx==2.3 22 | numpy==1.16.4 23 | plac==0.9.6 24 | preshed==2.0.1 25 | pyemd==0.5.1 26 | Pyphen==0.9.5 27 | pyrsistent==0.15.2 28 | python-Levenshtein==0.12.0 29 | requests==2.22.0 30 | scikit-learn==0.20.3 31 | scipy==1.3.0 32 | six==1.12.0 33 | soupsieve==1.9.2 34 | spacy==2.1.4 35 | srsly==0.0.7 36 | textacy==0.7.0 37 | thinc==7.0.4 38 | toolz==0.9.0 39 | tqdm==4.32.2 40 | urllib3==1.25.3 41 | wasabi==0.2.2 42 | Werkzeug==0.15.4 43 | -------------------------------------------------------------------------------- /static/css/btnout.css: -------------------------------------------------------------------------------- 1 | 2 | #entbtns{ 3 | width: 60%; 4 | margin-left: 20%; 5 | /*margin-left: 20%;*/ 6 | /*padding-right: 20%;*/ 7 | height: 100px; 8 | /*align-content: center;*/ 9 | /*display: inline-block;*/ 10 | /*text-align:center;*/ 11 | } 12 | .box{ 13 | /*margin-left: 10%;*/ 14 | /*padding-right: 10%;*/ 15 | /*padding: 0;*/ 16 | /*margin: 0 auto;*/ 17 | /*text-align:center;*/ 18 | /*box-sizing:border-box;*/ 19 | width: 33%; 20 | /*position:absolute;*/ 21 | /*top:50%;*/ 22 | /*left:50%;*/ 23 | /*transform : translate(-50% ,-50%);*/ 24 | float: left; 25 | padding : 0 auto; 26 | /*align-content: center;*/ 27 | /*display: inline-block;*/ 28 | text-align:center; 29 | 30 | } 31 | 32 | .btn:link, 33 | .btn:visited{ 34 | text-decoration: none; 35 | text-transform:uppercase; 36 | position:relative; 37 | top:0; 38 | left:0; 39 | padding:20px 40px; 40 | border-radius:100px; 41 | display:inline-block; 42 | transition: all .5s; 43 | } 44 | 45 | .btn-white{ 46 | background:#fff; 47 | color:#000; 48 | box-shadow:0px 10px 10px rgba(0,0,0,0.2); 49 | /*float: left;*/ 50 | /*margin: 0 auto;*/ 51 | } 52 | 53 | .btn:hover{ 54 | box-shadow:0px 10px 10px rgba(0,0,0,0.4); 55 | transform : translateY(-3px); 56 | } 57 | 58 | .btn:active{ 59 | box-shadow:0px 5px 10px rgba(0,0,0,0.2); 60 | transform:translateY(-1px); 61 | } 62 | 63 | .btn-bottom-animation-1{ 64 | animation:comeFromBottom 1s ease-out .8s; 65 | } 66 | 67 | .btn::after{ 68 | content:""; 69 | text-decoration: none; 70 | text-transform:uppercase; 71 | /*position:absolute;*/ 72 | width:100%; 73 | height:100%; 74 | /*top:0;*/ 75 | /*left:0;*/ 76 | border-radius:100px; 77 | /*display:inline-block;*/ 78 | /*z-index:-1;*/ 79 | transition: all .5s; 80 | } 81 | 82 | .btn-white::after { 83 | background: #fff; 84 | } 85 | 86 | .btn-animation-1:hover::after { 87 | transform: scaleX(1.4) scaleY(1.6); 88 | opacity: 0; 89 | } 90 | 91 | @keyframes comeFromBottom{ 92 | 0%{ 93 | opacity:0; 94 | transform:translateY(40px); 95 | } 96 | 100%{ 97 | opacity:1; 98 | transform:translateY(0); 99 | } 100 | } -------------------------------------------------------------------------------- /static/css/styles.css: -------------------------------------------------------------------------------- 1 | body { 2 | width: 100%; 3 | height: 100%; 4 | color: #fff; 5 | background: linear-gradient(-45deg, rgba(238, 119, 82, 0.41), rgba(231, 60, 126, 0.4), rgba(35, 166, 213, 0.4), rgba(35, 213, 171, 0.41)); 6 | /*background: linear-gradient(-45deg, rgba(238, 119, 82, 1), rgba(231, 60, 126, 1), rgba(35, 166, 213, 1), rgba(35, 213, 171, 1));*/ 7 | background-size: 400% 400%; 8 | -webkit-animation: Gradient 15s ease infinite; 9 | -moz-animation: Gradient 15s ease infinite; 10 | animation: Gradient 15s ease infinite; 11 | padding: 0; 12 | margin: 0; 13 | } 14 | 15 | #body { 16 | padding: 0; 17 | margin: 0; 18 | } 19 | 20 | body::-webkit-scrollbar { 21 | width: 0 !important 22 | } 23 | 24 | h1 { 25 | margin-top: 50px; 26 | text-align: center; 27 | font-size: 70px; 28 | } 29 | 30 | 31 | h2 { 32 | text-align: center; 33 | 34 | } 35 | 36 | #content { 37 | background-color: rgba(255, 255, 255, 1); 38 | /*background-color: rgba(255, 255, 255, 0.4);*/ 39 | color: black; 40 | padding: 10px 41 | 42 | } 43 | 44 | 45 | @-webkit-keyframes Gradient { 46 | 0% { 47 | background-position: 0% 50% 48 | } 49 | 50% { 50 | background-position: 100% 50% 51 | } 52 | 100% { 53 | background-position: 0% 50% 54 | } 55 | } 56 | 57 | @-moz-keyframes Gradient { 58 | 0% { 59 | background-position: 0% 50% 60 | } 61 | 50% { 62 | background-position: 100% 50% 63 | } 64 | 100% { 65 | background-position: 0% 50% 66 | } 67 | } 68 | 69 | @keyframes Gradient { 70 | 0% { 71 | background-position: 0% 50% 72 | } 73 | 50% { 74 | background-position: 100% 50% 75 | } 76 | 100% { 77 | background-position: 0% 50% 78 | } 79 | } 80 | 81 | 82 | /*facts*/ 83 | #facts { 84 | margin-top: 50px; 85 | align-items: center; 86 | } 87 | 88 | .fact { 89 | width: 60%; 90 | height: 50px; 91 | background-color: white; 92 | /*margin: 1px;*/ 93 | margin-left: 20%; 94 | color: #000; 95 | -webkit-animation: shadow-drop-2-center 1.2s cubic-bezier(0.250, 0.460, 0.450, 0.640) forwards; 96 | animation: shadow-drop-2-center 1.2s cubic-bezier(0.250, 0.460, 0.450, 0.640) forwards; 97 | } 98 | 99 | #facts ul, li { 100 | list-style: none; 101 | padding: 0; 102 | } 103 | 104 | 105 | li:empty { 106 | display: none; 107 | } 108 | 109 | ul:empty { 110 | display: none; 111 | } 112 | 113 | div:empty { 114 | display: none; 115 | } 116 | 117 | @-webkit-keyframes shadow-drop-2-center { 118 | 0% { 119 | -webkit-transform: translateZ(0); 120 | transform: translateZ(0); 121 | box-shadow: 0 0 0 0 rgba(0, 0, 0, 0); 122 | } 123 | 100% { 124 | -webkit-transform: translateZ(50px); 125 | transform: translateZ(50px); 126 | box-shadow: 0 0 20px 0px rgba(0, 0, 0, 0.35); 127 | } 128 | } 129 | 130 | @keyframes shadow-drop-2-center { 131 | 0% { 132 | -webkit-transform: translateZ(0); 133 | transform: translateZ(0); 134 | box-shadow: 0 0 0 0 rgba(0, 0, 0, 0); 135 | } 136 | 100% { 137 | -webkit-transform: translateZ(50px); 138 | transform: translateZ(50px); 139 | box-shadow: 0 0 20px 0px rgba(0, 0, 0, 0.35); 140 | } 141 | } 142 | 143 | .fact p { 144 | text-align: center; 145 | padding-top: 13px; 146 | } 147 | 148 | /*facts*/ 149 | 150 | 151 | /*text input*/ 152 | :root { 153 | background: #f5f6fa; 154 | color: #9c9c9c; 155 | font: 1rem "PT Sans", sans-serif; 156 | } 157 | 158 | html, 159 | body, 160 | .container { 161 | /*height: 50%;*/ 162 | margin: 0; 163 | padding: 0; 164 | } 165 | 166 | body, html { 167 | overflow-x: hidden; 168 | overflow-y: auto; 169 | position: fixed; 170 | } 171 | 172 | a { 173 | color: inherit; 174 | } 175 | 176 | a:hover { 177 | color: #7f8ff4; 178 | } 179 | 180 | .container { 181 | display: flex; 182 | flex-direction: column; 183 | align-items: center; 184 | justify-content: center; 185 | height: 50%; 186 | } 187 | 188 | .uppercase { 189 | text-transform: uppercase; 190 | } 191 | 192 | .btn { 193 | display: inline-block; 194 | background: transparent; 195 | color: inherit; 196 | font: inherit; 197 | border: 0; 198 | outline: 0; 199 | padding: 0; 200 | transition: all 200ms ease-in; 201 | cursor: pointer; 202 | /*position:absolute;*/ 203 | } 204 | 205 | .btn--primary { 206 | background: #7f8ff4; 207 | color: #fff; 208 | box-shadow: 0 0 10px 2px rgba(0, 0, 0, 0.1); 209 | border-radius: 2px; 210 | padding: 12px 36px; 211 | } 212 | 213 | .btn--primary:hover { 214 | background: #6c7ff2; 215 | } 216 | 217 | .btn--primary:active { 218 | background: #7f8ff4; 219 | box-shadow: inset 0 0 10px 2px rgba(0, 0, 0, 0.2); 220 | } 221 | 222 | .btn--inside { 223 | margin-left: -96px; 224 | } 225 | 226 | .form__field { 227 | width: 360px; 228 | background: #fff; 229 | color: #a3a3a3; 230 | font: inherit; 231 | box-shadow: 0 6px 10px 0 rgba(0, 0, 0, 0.1); 232 | border: 0; 233 | outline: 0; 234 | padding: 22px 18px; 235 | } 236 | 237 | /*text input*/ 238 | -------------------------------------------------------------------------------- /static/css/title.css: -------------------------------------------------------------------------------- 1 | .hover, 2 | .word, 3 | h1 { 4 | cursor: pointer; 5 | } 6 | 7 | h1 { 8 | position: relative; 9 | color: #fff; 10 | font: 900 60px Montserrat; 11 | text-shadow: 0 10px 25px rgba(0, 0, 0, 0.3); 12 | } 13 | 14 | .concept-eight { 15 | display: flex; 16 | align-items: center; 17 | justify-content: center; 18 | } 19 | .concept-eight .word { 20 | display: flex; 21 | align-items: center; 22 | justify-items: center; 23 | width: 450px; 24 | height: 80%; 25 | } 26 | .concept-eight .word:hover .char:before { 27 | transition: 0.6s ease; 28 | color: rgba(255, 255, 255, 0.8); 29 | animation: falls 1s ease forwards; 30 | } 31 | .concept-eight .word:hover .char:nth-child(1):before { 32 | animation: falls 0.9s ease 0.2s forwards; 33 | } 34 | .concept-eight .word:hover .char:nth-child(2n + 3):before { 35 | animation: falls 1s ease 0.3s forwards; 36 | } 37 | .concept-eight .char { 38 | flex: 1; 39 | position: relative; 40 | } 41 | .concept-eight .char:before { 42 | content: attr(data-content); 43 | position: absolute; 44 | top: 0; 45 | color: transparent; 46 | text-shadow: none; 47 | } 48 | 49 | @keyframes falls { 50 | 0% { 51 | opacity: 0; 52 | top: 0; 53 | } 54 | 20% { 55 | opacity: 1; 56 | } 57 | 100% { 58 | opacity: 0; 59 | top: 100px; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /templates/input.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Input 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |

Summarize the web easily with deep learning

25 |

Enter a URL below to get started

26 | 27 |
28 |
29 | 30 |
31 | 34 | 35 |
36 |
37 |
38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /templates/output.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Summary {{ top_entities[0] }} 5 | 6 | 7 | 8 | 10 | 11 | 12 |

Summary for {{ top_entities[0] }}

13 | 14 |
15 | 65 |
66 | 67 |

Related Terms

68 | 69 |
70 |
71 | {{ top_entities[1] }} 73 |
74 | 75 |
76 | {{ top_entities[2] }} 78 |
79 | 80 |
81 | {{ top_entities[3] }} 83 |
84 |
85 | 86 |

Visualization

87 |
{{ visualization | safe }}
88 | 89 | 90 | --------------------------------------------------------------------------------