├── Document_Redaction_&_Sanitization_Web_App_With_Spacy ├── app.py ├── static │ ├── downloadfiles │ │ ├── yourdocument20190220-234204.txt │ │ ├── yourdocument20190220-235210.txt │ │ ├── yourdocument20190223-133140.txt │ │ └── yourdocument20190223-133930.txt │ ├── js │ │ └── main.js │ └── uploadedfiles │ │ ├── sample.txt │ │ └── sampledoc.txt └── templates │ ├── downloadsdirectory.html │ ├── index.html │ └── result.html ├── README.md ├── Summaryzer_GUI ├── README.md ├── __pycache__ │ ├── nltk_summarization.cpython-36.pyc │ └── spacy_summarization.cpython-36.pyc ├── images │ ├── image_main.png │ ├── image_main2.png │ ├── image_main3.png │ ├── image_main4.png │ └── image_main5.png ├── myfilesummary.txt ├── nltk_summarization.py ├── spacy_summarization.py └── summaryzer_gui.py └── Summaryzer_Text_Summarization_App ├── __pycache__ └── spacy_summarization.cpython-36.pyc ├── app.py ├── imagesforapp ├── Screenshot from 2019-01-02 16-46-54.png ├── summaryzer_compare.png ├── summaryzer_compare2.png ├── summaryzer_home.png ├── summaryzer_home2.png ├── summaryzer_home3.png └── summaryzer_pics.png ├── nltk_summarization.py ├── nltk_summarization.pyc ├── requirements.txt ├── spacy_summarization.py ├── spacy_summarization.pyc ├── spacy_summarizer.py ├── static ├── css │ ├── custom.css │ ├── materialize.css │ └── materialize.min.css └── js │ ├── init.js │ ├── materialize.js │ └── materialize.min.js └── templates ├── compare_summary.html └── index.html /Document_Redaction_&_Sanitization_Web_App_With_Spacy/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask,url_for,render_template,request,send_file,redirect 2 | from flask_uploads import UploadSet,configure_uploads,ALL,DATA 3 | from werkzeug import secure_filename 4 | 5 | # Other Packages 6 | import os 7 | import spacy 8 | nlp = spacy.load('en') 9 | 10 | import time 11 | timestr = time.strftime("%Y%m%d-%H%M%S") 12 | 13 | # Initialize App 14 | app = Flask(__name__) 15 | # Configuration For Uploads 16 | files = UploadSet('files',ALL) 17 | app.config['UPLOADED_FILES_DEST'] = 'static/uploadedfiles' 18 | configure_uploads(app,files) 19 | 20 | # Functions to Sanitize and Redact 21 | def sanitize_names(text): 22 | docx = nlp(text) 23 | redacted_sentences = [] 24 | for ent in docx.ents: 25 | ent.merge() 26 | for token in docx: 27 | if token.ent_type_ == 'PERSON': 28 | redacted_sentences.append("[REDACTED NAME]") 29 | else: 30 | redacted_sentences.append(token.string) 31 | return "".join(redacted_sentences) 32 | 33 | def sanitize_places(text): 34 | docx = nlp(text) 35 | redacted_sentences = [] 36 | for ent in docx.ents: 37 | ent.merge() 38 | for token in docx: 39 | if token.ent_type_ == 'GPE': 40 | redacted_sentences.append("[REDACTED PLACE]") 41 | else: 42 | redacted_sentences.append(token.string) 43 | return "".join(redacted_sentences) 44 | 45 | def sanitize_date(text): 46 | docx = nlp(text) 47 | redacted_sentences = [] 48 | for ent in docx.ents: 49 | ent.merge() 50 | for token in docx: 51 | if token.ent_type_ == 'DATE': 52 | redacted_sentences.append("[REDACTED DATE]") 53 | else: 54 | redacted_sentences.append(token.string) 55 | return "".join(redacted_sentences) 56 | 57 | def sanitize_org(text): 58 | docx = nlp(text) 59 | redacted_sentences = [] 60 | for ent in docx.ents: 61 | ent.merge() 62 | for token in docx: 63 | if token.ent_type_ == 'ORG': 64 | redacted_sentences.append("[REDACTED]") 65 | else: 66 | redacted_sentences.append(token.string) 67 | return "".join(redacted_sentences) 68 | 69 | def writetofile(text): 70 | file_name = 'yourdocument' + timestr + '.txt' 71 | with open(os.path.join('static/downloadfiles',file_name),'w') as f: 72 | f.write(text) 73 | 74 | 75 | @app.route('/') 76 | def index(): 77 | return render_template('index.html') 78 | 79 | @app.route('/sanitize',methods=['GET','POST']) 80 | def sanitize(): 81 | if request.method == 'POST': 82 | choice = request.form['taskoption'] 83 | rawtext = request.form['rawtext'] 84 | if choice == 'redact_names': 85 | result = sanitize_names(rawtext) 86 | elif choice == 'places': 87 | result = sanitize_places(rawtext) 88 | elif choice == 'date': 89 | result = sanitize_date(rawtext) 90 | elif choice == 'org': 91 | result = sanitize_org(rawtext) 92 | else: 93 | result = sanitize_names(rawtext) 94 | return render_template('index.html',rawtext=rawtext,result=result) 95 | 96 | 97 | @app.route('/uploads',methods=['GET','POST']) 98 | def uploads(): 99 | if request.method == 'POST' and 'txt_data' in request.files: 100 | file = request.files['txt_data'] 101 | choice = request.form['saveoption'] 102 | filename = secure_filename(file.filename) 103 | file.save(os.path.join('static/uploadedfiles',filename)) 104 | 105 | # Document Redaction Here 106 | with open(os.path.join('static/uploadedfiles',filename),'r+') as f: 107 | myfile = f.read() 108 | result = sanitize_names(myfile) 109 | if choice == 'savetotxt': 110 | new_res = writetofile(result) 111 | return redirect(url_for('downloads')) 112 | elif choice == 'no_save': 113 | pass 114 | else: 115 | pass 116 | 117 | 118 | 119 | 120 | return render_template('result.html',filename=filename,result=result,myfile=myfile) 121 | 122 | @app.route('/downloads') 123 | def downloads(): 124 | files = os.listdir(os.path.join('static/downloadfiles')) 125 | return render_template('downloadsdirectory.html',files=files) 126 | 127 | if __name__ == '__main__': 128 | app.run(debug=True) -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/static/downloadfiles/yourdocument20190220-234204.txt: -------------------------------------------------------------------------------- 1 | [REDACTED]was the president of USA last year 2 | [REDACTED]is standing in London -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/static/downloadfiles/yourdocument20190220-235210.txt: -------------------------------------------------------------------------------- 1 | [REDACTED]was the president of USA last year 2 | [REDACTED]is standing in London -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/static/downloadfiles/yourdocument20190223-133140.txt: -------------------------------------------------------------------------------- 1 | [REDACTED NAME][REDACTED NAME]( born June 28, 1971) is a technology entrepreneur and engineer.He holds South African, Canadian, and U.S. citizenship and is the founder, CEO, and lead designer of SpaceX; co-founder, CEO, and product architect of Tesla, Inc.; co-founder and CEO of Neuralink; founder of The Boring Company; and co-founder of PayPal. In December 2016, [REDACTED NAME]was ranked 21st on the Forbes list of The World's Most Powerful People. As of October 2018, he has a net worth of $22.8 billion and is listed by Forbes as the 54th-richest person in the world. 2 | 3 | Born and raised in Pretoria, South Africa, Musk moved to Canada when he was 17 to attend Queen's University. He transferred to the University of Pennsylvania two years later, where he received an economics degree from the Wharton School and a degree in physics from the College of Arts and Sciences. He began a Ph.D. in applied physics and material sciences at Stanford University in 1995 but dropped out after two days to pursue an entrepreneurial career. He subsequently co-founded Zip2, a web software company, which was acquired by Compaq for $340 million in 1999. Musk then founded X.com, an online bank. It merged with Confinity in 2000 and later that year became PayPal, which was bought by eBay for $1.5 billion in October 2002. 4 | 5 | In May 2002, Musk founded SpaceX, an aerospace manufacturer and space transport services company, of which he is CEO and lead designer. He helped fund Tesla, Inc., an electric vehicle and solar panel manufacturer, in 2003, and became its CEO and product architect. In 2006, he inspired the creation of SolarCity, a solar energy services company that is now a subsidiary of Tesla, and operates as its chairman. In 2015, Musk co-founded [REDACTED NAME], a nonprofit research company that aims to promote friendly artificial intelligence. In July 2016, he co-founded Neuralink, a neurotechnology company focused on developing [REDACTED NAME]interfaces, and is its CEO. In December 2016, Musk founded The Boring Company, an infrastructure and tunnel-construction company. 6 | 7 | -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/static/downloadfiles/yourdocument20190223-133930.txt: -------------------------------------------------------------------------------- 1 | [REDACTED NAME][REDACTED NAME]( born June 28, 1971) is a technology entrepreneur and engineer.He holds South African, Canadian, and U.S. citizenship and is the founder, CEO, and lead designer of SpaceX; co-founder, CEO, and product architect of Tesla, Inc.; co-founder and CEO of Neuralink; founder of The Boring Company; and co-founder of PayPal. In December 2016, [REDACTED NAME]was ranked 21st on the Forbes list of The World's Most Powerful People. As of October 2018, he has a net worth of $22.8 billion and is listed by Forbes as the 54th-richest person in the world. 2 | 3 | Born and raised in Pretoria, South Africa, Musk moved to Canada when he was 17 to attend Queen's University. He transferred to the University of Pennsylvania two years later, where he received an economics degree from the Wharton School and a degree in physics from the College of Arts and Sciences. He began a Ph.D. in applied physics and material sciences at Stanford University in 1995 but dropped out after two days to pursue an entrepreneurial career. He subsequently co-founded Zip2, a web software company, which was acquired by Compaq for $340 million in 1999. Musk then founded X.com, an online bank. It merged with Confinity in 2000 and later that year became PayPal, which was bought by eBay for $1.5 billion in October 2002. 4 | 5 | In May 2002, Musk founded SpaceX, an aerospace manufacturer and space transport services company, of which he is CEO and lead designer. He helped fund Tesla, Inc., an electric vehicle and solar panel manufacturer, in 2003, and became its CEO and product architect. In 2006, he inspired the creation of SolarCity, a solar energy services company that is now a subsidiary of Tesla, and operates as its chairman. In 2015, Musk co-founded [REDACTED NAME], a nonprofit research company that aims to promote friendly artificial intelligence. In July 2016, he co-founded Neuralink, a neurotechnology company focused on developing [REDACTED NAME]interfaces, and is its CEO. In December 2016, Musk founded The Boring Company, an infrastructure and tunnel-construction company. 6 | 7 | -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/static/js/main.js: -------------------------------------------------------------------------------- 1 | function copyToClipboard(element) { 2 | var $temp = $(""); 3 | $("body").append($temp); 4 | $temp.val($(element).text()).select(); 5 | document.execCommand("copy"); 6 | alert("Result Copied"); 7 | $temp.remove(); 8 | } 9 | 10 | // function copyToClipboard() { 11 | // var copyText = document.getElementById("p1"); 12 | // copyText.select(); 13 | // document.execCommand("copy"); 14 | // alert("Copied the result "); 15 | // console.log("Copied"); 16 | // } -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/static/uploadedfiles/sample.txt: -------------------------------------------------------------------------------- 1 | Barack Obama was the president of USA last four years ago 2 | John is standing in London 3 | The most influential person in history is Jesus Christ. 4 | Google revolutionarized the internet via information search. 5 | 6 | -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/static/uploadedfiles/sampledoc.txt: -------------------------------------------------------------------------------- 1 | Elon Reeve Musk FRS ( born June 28, 1971) is a technology entrepreneur and engineer.He holds South African, Canadian, and U.S. citizenship and is the founder, CEO, and lead designer of SpaceX; co-founder, CEO, and product architect of Tesla, Inc.; co-founder and CEO of Neuralink; founder of The Boring Company; and co-founder of PayPal. In December 2016, Elon Musk was ranked 21st on the Forbes list of The World's Most Powerful People. As of October 2018, he has a net worth of $22.8 billion and is listed by Forbes as the 54th-richest person in the world. 2 | 3 | Born and raised in Pretoria, South Africa, Musk moved to Canada when he was 17 to attend Queen's University. He transferred to the University of Pennsylvania two years later, where he received an economics degree from the Wharton School and a degree in physics from the College of Arts and Sciences. He began a Ph.D. in applied physics and material sciences at Stanford University in 1995 but dropped out after two days to pursue an entrepreneurial career. He subsequently co-founded Zip2, a web software company, which was acquired by Compaq for $340 million in 1999. Musk then founded X.com, an online bank. It merged with Confinity in 2000 and later that year became PayPal, which was bought by eBay for $1.5 billion in October 2002. 4 | 5 | In May 2002, Musk founded SpaceX, an aerospace manufacturer and space transport services company, of which he is CEO and lead designer. He helped fund Tesla, Inc., an electric vehicle and solar panel manufacturer, in 2003, and became its CEO and product architect. In 2006, he inspired the creation of SolarCity, a solar energy services company that is now a subsidiary of Tesla, and operates as its chairman. In 2015, Musk co-founded OpenAI, a nonprofit research company that aims to promote friendly artificial intelligence. In July 2016, he co-founded Neuralink, a neurotechnology company focused on developing brain–computer interfaces, and is its CEO. In December 2016, Musk founded The Boring Company, an infrastructure and tunnel-construction company. 6 | 7 | -------------------------------------------------------------------------------- /Document_Redaction_&_Sanitization_Web_App_With_Spacy/templates/downloadsdirectory.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |{{ rawtext }}
119 | 120 |{{ result }}
126 | 127 |