├── README.md ├── app.py ├── config.py ├── static └── style.css ├── summarization ├── __init__.py ├── summarizer.py ├── sumy_summarizer.py ├── test_summarizer.py └── utils.py └── templates ├── error.html └── index.html /README.md: -------------------------------------------------------------------------------- 1 | # PySummary 2 | 3 | PySummary is a Python Flask API that provides a convenient endpoint for generating text summaries using advanced NLP processing, that uses stochaistic & hybrid techniques. This API allows developers to integrate text summarization functionality into their applications effortlessly. 4 | 5 | ## Key Features 6 | 7 | - RESTful API for Easy Integration: PySummary offers a RESTful API for seamless integration into various applications. 8 | 9 | - Adjustable Summary Length and Focus: Developers can customize the length and focus of the generated summaries to meet their specific requirements. 10 | 11 | - Supports Different Text Formats: PySummary supports various text formats such as plain text (txt), HTML, and more. 12 | 13 | - Lightweight and Easy to Deploy: The Flask framework makes PySummary lightweight and easy to deploy, ensuring quick and hassle-free setup. 14 | 15 | - Documentation and Usage Examples for Developers: Comprehensive documentation and usage examples are provided to assist developers in utilizing the API effectively. 16 | 17 | 18 | ## Configuration 19 | - The summarization method (Sumy or TextBlob) can be configured in the config.py file. 20 | - Fine-tune other configurations, such as the Flask app's host and port. 21 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify, render_template 2 | from summarization.summarizer import Summarizer 3 | 4 | app = Flask(__name__) 5 | 6 | # Load summarizer based on the configured method 7 | summarizer = Summarizer() 8 | 9 | @app.route('/') 10 | def index(): 11 | return render_template('index.html') 12 | 13 | @app.route('/summarize', methods=['POST']) 14 | def summarize(): 15 | try: 16 | data = request.get_json() 17 | 18 | if 'text' not in data: 19 | return jsonify({'error': 'Text content is missing in the request'}), 400 20 | 21 | text = data['text'] 22 | length = int(data.get('length', 5)) 23 | focus = data.get('focus', 'sentences') 24 | 25 | summary = summarizer.generate_summary(text, length, focus) 26 | 27 | return jsonify({'summary': summary}) 28 | 29 | except Exception as e: 30 | return jsonify({'error': str(e)}), 500 31 | 32 | if __name__ == '__main__': 33 | app.run(debug=True) 34 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | # Configuration settings for the Flask app and summarization method 3 | # Remember to keep this file private 4 | 5 | class Config: 6 | # Flask app configuration 7 | FLASK_HOST = '127.0.0.1' 8 | FLASK_PORT = 5000 9 | 10 | # Summarization method configuration 11 | # Options: 'sumy', 'textblob' 12 | SUMMARIZATION_METHOD = 'sumy' 13 | 14 | @staticmethod 15 | def get_flask_host(): 16 | return Config.FLASK_HOST 17 | 18 | @staticmethod 19 | def get_flask_port(): 20 | return Config.FLASK_PORT 21 | 22 | @staticmethod 23 | def get_summarization_method(): 24 | return Config.SUMMARIZATION_METHOD 25 | -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | /* static/style.css */ 2 | body { 3 | font-family: 'Arial', sans-serif; 4 | background-color: #f8f8f8; 5 | color: #333; 6 | margin: 0; 7 | padding: 0; 8 | } 9 | 10 | .container { 11 | max-width: 800px; 12 | margin: 50px auto; 13 | background-color: #fff; 14 | padding: 20px; 15 | border-radius: 8px; 16 | box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); 17 | } 18 | 19 | code { 20 | display: block; 21 | padding: 10px; 22 | background-color: #f4f4f4; 23 | border: 1px solid #ddd; 24 | margin-bottom: 10px; 25 | } 26 | 27 | pre { 28 | background-color: #f4f4f4; 29 | padding: 10px; 30 | border: 1px solid #ddd; 31 | border-radius: 4px; 32 | } 33 | 34 | .error-message { 35 | color: #d9534f; 36 | font-weight: bold; 37 | } 38 | -------------------------------------------------------------------------------- /summarization/__init__.py: -------------------------------------------------------------------------------- 1 | # summarization/__init__.py 2 | # Initialization file for the summarization package 3 | 4 | from .summarizer import Summarizer 5 | -------------------------------------------------------------------------------- /summarization/summarizer.py: -------------------------------------------------------------------------------- 1 | # summarization/summarizer.py 2 | # Interface for the summarization process 3 | 4 | from .sumy_summarizer import SumySummarizer # Import the SumySummarizer class 5 | from .textblob_summarizer import TextBlobSummarizer # Import the TextBlobSummarizer class 6 | from .utils import clean_text 7 | 8 | class Summarizer: 9 | def __init__(self, method='sumy'): 10 | # Initialize the selected summarization method 11 | self.method = method.lower() 12 | if self.method == 'sumy': 13 | self.summarizer = SumySummarizer() 14 | elif self.method == 'textblob': 15 | self.summarizer = TextBlobSummarizer() 16 | else: 17 | raise ValueError(f"Invalid summarization method: {method}. Choose 'sumy' or 'textblob'.") 18 | 19 | def generate_summary(self, text, length=5, focus='sentences'): 20 | # Clean the input text 21 | cleaned_text = clean_text(text) 22 | 23 | # Generate summary using the selected method 24 | summary = self.summarizer.generate_summary(cleaned_text, length, focus) 25 | 26 | return summary 27 | -------------------------------------------------------------------------------- /summarization/sumy_summarizer.py: -------------------------------------------------------------------------------- 1 | # summarization/sumy_summarizer.py 2 | # Implementation of the summarization process using the Sumy library 3 | 4 | from sumy.parsers.plaintext import PlaintextParser 5 | from sumy.nlp.tokenizers import Tokenizer 6 | from sumy.summarizers.lsa import LsaSummarizer 7 | from .utils import clean_text 8 | 9 | class SumySummarizer: 10 | def __init__(self): 11 | # Initialize SumySummarizer with necessary components 12 | self.tokenizer = Tokenizer('english') 13 | self.summarizer = LsaSummarizer() 14 | 15 | def generate_summary(self, text, length=5, focus='sentences'): 16 | # Clean the input text 17 | cleaned_text = clean_text(text) 18 | 19 | # Parse the cleaned text 20 | parser = PlaintextParser.from_string(cleaned_text, self.tokenizer) 21 | 22 | # Summarize based on the specified length and focus 23 | if focus == 'sentences': 24 | sentences = self.summarizer(parser.document, length) 25 | summary = ' '.join(str(sentence) for sentence in sentences) 26 | elif focus == 'words': 27 | words = self.summarizer(parser.document, length) 28 | summary = ' '.join(str(word) for word in words) 29 | else: 30 | raise ValueError(f"Invalid focus option: {focus}. Choose 'sentences' or 'words'.") 31 | 32 | return summary 33 | -------------------------------------------------------------------------------- /summarization/test_summarizer.py: -------------------------------------------------------------------------------- 1 | # tests/test_summarizer.py 2 | # Unit tests for the Summarizer class 3 | 4 | import unittest 5 | from summarization.summarizer import Summarizer 6 | 7 | class TestSummarizer(unittest.TestCase): 8 | def test_sumy_summarization(self): 9 | summarizer = Summarizer(method='sumy') 10 | summary = summarizer.generate_summary("Test summarization with Sumy.", length=2, focus='sentences') 11 | self.assertTrue(len(summary) > 0) 12 | 13 | def test_textblob_summarization(self): 14 | summarizer = Summarizer(method='textblob') 15 | summary = summarizer.generate_summary("Test summarization with TextBlob.", length=3, focus='words') 16 | self.assertTrue(len(summary) > 0) 17 | 18 | if __name__ == '__main__': 19 | unittest.main() 20 | -------------------------------------------------------------------------------- /summarization/utils.py: -------------------------------------------------------------------------------- 1 | # summarization/utils.py 2 | # Utility functions for text processing 3 | 4 | import re 5 | 6 | def clean_text(text): 7 | # Remove special characters, extra spaces, and newlines 8 | cleaned_text = re.sub(r'\s+', ' ', text) 9 | cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', cleaned_text) 10 | cleaned_text = cleaned_text.strip() 11 | 12 | return cleaned_text 13 | -------------------------------------------------------------------------------- /templates/error.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | 7 |An error occurred during the API request:
14 | 15 |Welcome to the PySummary API! Use the /summarize endpoint to generate text summaries.
14 |Submit a POST request with JSON payload containing the text content to be summarized.
16 |Example using cURL:
17 |curl -X POST -H "Content-Type: application/json" -d '{"text": "Your text content goes here."}' http://127.0.0.1:5000/summarize
18 | Optional parameters:
19 |length
(default: 5): Number of sentences or words in the generated summaryfocus
(default: 'sentences'): Specify 'sentences' or 'words' for summary focusThe API responds with a JSON object containing the generated summary:
25 |{"summary": "Generated summary of the text content."}26 |