├── .env.exmaple ├── .gitignore ├── Dockerfile ├── README.md ├── api.py ├── doc.py ├── nltk.txt ├── requirements.txt ├── summary.py ├── templates └── index.html └── web.py /.env.exmaple: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=... -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | Procfile 3 | AWS* 4 | __pycache__/ 5 | *.docx -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Python base image 2 | FROM python:3.9-slim 3 | 4 | # copy the requirements file into the image 5 | COPY ./requirements.txt /app/requirements.txt 6 | 7 | # switch working directory 8 | WORKDIR /app 9 | 10 | # install the dependencies and packages in the requirements file 11 | RUN pip install -r requirements.txt 12 | 13 | # copy every content from the local file to the image 14 | COPY . /app 15 | 16 | # configure the container to run in an executed manner 17 | ENTRYPOINT [ "python" ] 18 | 19 | # Start the application 20 | CMD ["api.py"] 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Install 2 | 3 | pip install -r requirements.txt 4 | 5 | # Configure 6 | 7 | Copy and paste .env.example to .env 8 | input your openai api key 9 | 10 | # Run 11 | 12 | python api.py 13 | -------------------------------------------------------------------------------- /api.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, jsonify, request 2 | from urllib.parse import unquote 3 | 4 | from decouple import config 5 | 6 | from web import decode_website 7 | # from doc import decoded_doc 8 | from summary import summarize_webpage 9 | 10 | import os 11 | import nltk 12 | nltk.data.path.append('nltk_data') 13 | 14 | os.environ["OPENAI_API_KEY"] = config('OPENAI_API_KEY') 15 | app = Flask(__name__) 16 | 17 | @app.route('/') 18 | def home(): 19 | return render_template('index.html') 20 | 21 | @app.route('/api/data', methods=['GET','POST']) 22 | def get_data(): 23 | if request.method == 'GET': 24 | sample_data = { 25 | 'message': 'Hello, Flask API!', 26 | 'data': [1, 2, 3, 4, 5] 27 | } 28 | print ("DEBUG",sample_data) 29 | return jsonify(sample_data) 30 | elif request.method == 'POST': 31 | print ("DEBUG request",request) 32 | encode_url = unquote(unquote(request.args.get('url'))) 33 | print ("DEBUG encode_url",encode_url) 34 | if not encode_url: 35 | return jsonify({'error': 'URL is required'}), 400 36 | 37 | decoded_text = decode_website(encode_url) 38 | 39 | print ("DEBUG decoded_text",decoded_text) 40 | 41 | summary = summarize_webpage(decoded_text) 42 | 43 | response = { 44 | 'submitted_url': encode_url, 45 | 'summary': summary, 46 | } 47 | 48 | return jsonify(response) 49 | 50 | 51 | if __name__ == "__main__": 52 | port = int(os.environ.get('PORT', 5001)) 53 | app.run(debug=True, host='0.0.0.0', port=port) 54 | 55 | 56 | -------------------------------------------------------------------------------- /doc.py: -------------------------------------------------------------------------------- 1 | from langchain.document_loaders import UnstructuredWordDocumentLoader 2 | 3 | def decoded_doc(url) : 4 | loader = UnstructuredWordDocumentLoader(url) 5 | data = loader.load() 6 | web_text = "" 7 | 8 | for page in data: 9 | web_text += page.page_content + " " 10 | print(web_text) 11 | return web_text 12 | 13 | -------------------------------------------------------------------------------- /nltk.txt: -------------------------------------------------------------------------------- 1 | wordnet 2 | pros_cons 3 | reuters 4 | maxent_treebank_pos_tagger 5 | universal_tagset 6 | punkt 7 | averaged_perceptron_tagger_ru 8 | averaged_perceptron_tagger 9 | snowball_data 10 | rslp 11 | porter_test 12 | vader_lexicon 13 | treebank 14 | dependency_treebank -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | gunicorn 3 | langchain 4 | openai==0.27.0 5 | tiktoken 6 | urllib3 7 | bs4 8 | selenium 9 | unstructured 10 | python-decouple 11 | nltk 12 | python-magic 13 | python-dotenv -------------------------------------------------------------------------------- /summary.py: -------------------------------------------------------------------------------- 1 | from langchain import OpenAI 2 | from langchain.text_splitter import CharacterTextSplitter 3 | from langchain.docstore.document import Document 4 | from langchain.chains.summarize import load_summarize_chain 5 | import textwrap 6 | 7 | def summarize_webpage(text): 8 | llm = OpenAI(temperature=0) 9 | text_splitter = CharacterTextSplitter() 10 | texts = text_splitter.split_text(text) 11 | print(len(texts)) 12 | docs = [Document(page_content=t) for t in texts[:4]] 13 | chain = load_summarize_chain(llm, 14 | chain_type="map_reduce") 15 | 16 | 17 | output_summary = chain.run(docs) 18 | wrapped_text = textwrap.fill(output_summary, width=100) 19 | print(wrapped_text) 20 | 21 | return wrapped_text -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 |