├── pyviralcontent.egg-info
    ├── dependency_links.txt
    ├── top_level.txt
    ├── requires.txt
    ├── SOURCES.txt
    └── PKG-INFO
├── UML.png
├── Sample.JPG
├── dist
    ├── pyviralcontent-0.1.4.tar.gz
    └── pyviralcontent-0.1.4-py3-none-any.whl
├── pyviralcontent
    ├── __init__.py
    ├── visualizer.py
    ├── syllable_counter.py
    ├── content_analyzer.py
    ├── text_analyzer.py
    ├── likert_scale.py
    └── readability_calculator.py
├── setup.py
├── .github
    └── workflows
    │   └── python-publish.yml
├── main.py
└── README.md


/pyviralcontent.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyviralcontent.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pyviralcontent
2 | 


--------------------------------------------------------------------------------
/UML.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bhaskatripathi/pyviralcontent/HEAD/UML.png


--------------------------------------------------------------------------------
/Sample.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bhaskatripathi/pyviralcontent/HEAD/Sample.JPG


--------------------------------------------------------------------------------
/pyviralcontent.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | numpy
3 | matplotlib
4 | seaborn
5 | 


--------------------------------------------------------------------------------
/dist/pyviralcontent-0.1.4.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bhaskatripathi/pyviralcontent/HEAD/dist/pyviralcontent-0.1.4.tar.gz


--------------------------------------------------------------------------------
/dist/pyviralcontent-0.1.4-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bhaskatripathi/pyviralcontent/HEAD/dist/pyviralcontent-0.1.4-py3-none-any.whl


--------------------------------------------------------------------------------
/pyviralcontent/__init__.py:
--------------------------------------------------------------------------------
 1 | from .syllable_counter import SyllableCounter
 2 | from .text_analyzer import TextAnalyzer
 3 | from .likert_scale import LikertScale
 4 | from .readability_calculator import ReadabilityCalculator
 5 | from .visualizer import Visualizer
 6 | from .content_analyzer import ContentAnalyzer
 7 | 
 8 | __all__ = [
 9 |     'SyllableCounter', 'TextAnalyzer', 'LikertScale',
10 |     'ReadabilityCalculator', 'Visualizer', 'ContentAnalyzer'
11 | ]
12 | 


--------------------------------------------------------------------------------
/pyviralcontent.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | README.md
 2 | setup.py
 3 | pyviralcontent/__init__.py
 4 | pyviralcontent/content_analyzer.py
 5 | pyviralcontent/likert_scale.py
 6 | pyviralcontent/readability_calculator.py
 7 | pyviralcontent/syllable_counter.py
 8 | pyviralcontent/text_analyzer.py
 9 | pyviralcontent/visualizer.py
10 | pyviralcontent.egg-info/PKG-INFO
11 | pyviralcontent.egg-info/SOURCES.txt
12 | pyviralcontent.egg-info/dependency_links.txt
13 | pyviralcontent.egg-info/requires.txt
14 | pyviralcontent.egg-info/top_level.txt


--------------------------------------------------------------------------------
/pyviralcontent/visualizer.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | # This class provides visualization functionality. It can plot a heatmap of readability scores for each test, providing a visual representation of the analysis results.
 7 | class Visualizer:
 8 |     @staticmethod
 9 |     def plot_scores_heatmap(df, content_type):
10 |         fig, ax = plt.subplots(figsize=(8, 1), dpi=300)
11 |         heatmap_data = pd.pivot_table(data=df, values='Score', index=['Test'], aggfunc=np.mean)
12 |         sns.heatmap(heatmap_data, annot=True, fmt=".1f", linewidths=.5, ax=ax)
13 |         plt.title(f'Readability Scores for {content_type.capitalize()} Content')
14 |         plt.show()


--------------------------------------------------------------------------------
/pyviralcontent/syllable_counter.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # This class is responsible for counting the syllables in a word. It identifies vowels in a word and applies rules to count syllables,
 3 | # which is essential for readability analysis.
 4 | class SyllableCounter:
 5 |     def __init__(self):
 6 |         self.vowels = "aeiouy"
 7 | 
 8 |     def count(self, word):
 9 |         word = word.lower()
10 |         syllable_count = 0
11 |         if word[0] in self.vowels:
12 |             syllable_count += 1
13 |         for index in range(1, len(word)):
14 |             if word[index] in self.vowels and word[index - 1] not in self.vowels:
15 |                 syllable_count += 1
16 |         if word.endswith("e"):
17 |             syllable_count -= 1
18 |         if syllable_count == 0:
19 |             syllable_count += 1
20 |         return syllable_count


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | import codecs
 3 | import os
 4 | 
 5 | # read the contents of your README file
 6 | this_directory = os.path.abspath(os.path.dirname(__file__))
 7 | with codecs.open(os.path.join(this_directory, 'README.md'), encoding='utf-8') as f:
 8 |     long_description = f.read()
 9 | 
10 | setup(
11 |     name='pyviralcontent',
12 |     version='0.1.4',
13 |     packages=find_packages(),
14 |     install_requires=[
15 |         'pandas', 
16 |         'numpy', 
17 |         'matplotlib', 
18 |         'seaborn'
19 |     ],
20 |     author='Bhaskar Tripathi',
21 |     author_email='bhaskar.tripathi@gmail.com',
22 |     description='A package for analyzing content readability and virality potential.',
23 |     long_description=long_description,
24 |     long_description_content_type='text/markdown',
25 |     keywords='readability virality content-analysis',
26 |     url='https://github.com/bhaskatripathi/pyviralcontent',
27 | )
28 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN_VIRAL_CONTENT }}
40 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | #from pyviralcontent import ContentAnalyzer
 2 | from pyviralcontent.content_analyzer import ContentAnalyzer
 3 | 
 4 | def main():
 5 |     # Mapping of content type numbers to content type names
 6 |     content_type_map = {
 7 |         0: 'scientific',
 8 |         1: 'blog',
 9 |         2: 'video',
10 |         3: 'technical',
11 |         4: 'fictional',
12 |         5: 'legal',
13 |         6: 'educational',
14 |         7: 'news',
15 |         8: 'advertising',
16 |         9: 'social_media'
17 |     }
18 | 
19 |     # Prompt user for a content type number
20 |     content_type_number = int(input("Enter a number for the content type (0 for scientific, 1 for blog, ..., 9 for social_media): "))
21 |     
22 |     # Get the content type name from the content_type_map
23 |     content_type_name = content_type_map.get(content_type_number)
24 |     
25 |     if content_type_name is None:
26 |         print("Invalid content type number.")
27 |     else:
28 |         # User to input the text content
29 |         text_content = input("Enter the text content for analysis:\n")
30 |         
31 |         # Create instance of ContentAnalyzer with the chosen content type
32 |         analyzer = ContentAnalyzer(text_content, content_type_name)
33 | 
34 |         # Perform the analysis
35 |         df, viral_probability = analyzer.analyze()
36 | 
37 |         # Print the results
38 |         print(f"Readability Scores Summary for {content_type_name.capitalize()} Content:")
39 |         print(df)
40 |         print(f"The probability of the content going viral is: {viral_probability * 100:.2f}%")
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/pyviralcontent/content_analyzer.py:
--------------------------------------------------------------------------------
 1 | from .readability_calculator import ReadabilityCalculator
 2 | from .likert_scale import LikertScale
 3 | from .visualizer import Visualizer
 4 | 
 5 | # This class acts as a high-level interface for content analysis. 
 6 | # It utilizes ReadabilityCalculator, LikertScale, and Visualizer to perform a comprehensive readability analysis, including calculating scores,
 7 | # determining average scores, calculating virality probability, and visualizing results.
 8 | class ContentAnalyzer:
 9 |     def __init__(self, text_content, content_type):
10 |         self.text_content = text_content
11 |         self.content_type = content_type
12 |         self.readability_calculator = ReadabilityCalculator()
13 |         self.likert_scale = LikertScale()
14 |         self.visualizer = Visualizer()
15 | 
16 |     def analyze(self):
17 |         # Calculate readability scores
18 |         df = self.readability_calculator.calculate_scores_by_content_type(self.text_content, self.content_type)
19 |         # Calculate the average score and determine the overall Likert scale and quality
20 |         average_score, overall_likert, overall_quality = self.likert_scale.calculate_average_score(df)
21 |         df.loc[df.shape[0]] = ['OVERALL SCORE', average_score, overall_likert, overall_quality]
22 |         # Calculate the probability of the content going viral
23 |         #viral_probability = self.readability_calculator.calculate_virality_probability(df)
24 |         viral_probability = self.readability_calculator.calculate_virality_probability(df,self.content_type)
25 |         # Visualize the readability scores heatmap
26 |         self.visualizer.plot_scores_heatmap(df, self.content_type)
27 |         return df, viral_probability


--------------------------------------------------------------------------------
/pyviralcontent/text_analyzer.py:
--------------------------------------------------------------------------------
 1 | from .syllable_counter import SyllableCounter
 2 | 
 3 | # This class performs various text analysis operations. It utilizes the SyllableCounter to perform syllable-related computations
 4 | # and calculates different readability scores such as Flesch Reading Ease, Gunning Fog Index, etc.
 5 | class TextAnalyzer:
 6 |     def __init__(self):
 7 |         self.syllable_counter = SyllableCounter()
 8 | 
 9 |     def complex_word_count(self, text):
10 |         words = text.split()
11 |         complex_words = [word for word in words if self.syllable_counter.count(word) >= 3]
12 |         return len(complex_words)
13 | 
14 |     def flesch_reading_ease(self, total_sentences, total_words, total_syllables):
15 |         return 206.835 - 1.015 * (total_words / total_sentences) - 84.6 * (total_syllables / total_words)
16 | 
17 |     def flesch_kincaid(self, total_sentences, total_words, total_syllables):
18 |         return 0.39 * (total_words / total_sentences) + 11.8 * (total_syllables / total_words) - 15.59
19 | 
20 |     def gunning_fog(self, total_sentences, total_words, total_complex_words):
21 |         return 0.4 * ((total_words / total_sentences) + 100 * (total_complex_words / total_words))
22 | 
23 |     def smog(self, total_sentences, total_complex_words):
24 |         return 1.0430 * (30 * (total_complex_words / total_sentences)) ** 0.5 + 3.1291
25 | 
26 |     def linsear_write(self, text, total_sentences):
27 |         sample = text.split()[:100]
28 |         easy_word = len([word for word in sample if self.syllable_counter.count(word) < 3])
29 |         hard_word = len(sample) - easy_word
30 |         return (easy_word + (hard_word * 3)) / total_sentences
31 | 
32 |     def coleman_liau(self, total_sentences, total_words, total_characters):
33 |         return 0.0588 * (total_characters / total_words * 100) - 0.296 * (total_sentences / total_words * 100) - 15.8
34 | 
35 |     def ari(self, total_sentences, total_words, total_characters):
36 |         return 4.71 * (total_characters / total_words) + 0.5 * (total_words / total_sentences) - 21.43
37 | 
38 |     def calculate_readability_scores(self, text):
39 |         sentences = text.split('.')
40 |         words = text.split()
41 |         characters = ''.join(words)
42 |         syllables = sum(self.syllable_counter.count(word) for word in words)
43 |         complex_words = self.complex_word_count(text)
44 | 
45 |         # Total counts
46 |         total_sentences = len(sentences)
47 |         total_words = len(words)
48 |         total_characters = len(characters)
49 |         total_syllables = syllables
50 |         total_complex_words = complex_words
51 | 
52 |         return {
53 |             'flesch_reading_ease': self.flesch_reading_ease(total_sentences, total_words, total_syllables),
54 |             'flesch_kincaid': self.flesch_kincaid(total_sentences, total_words, total_syllables),
55 |             'gunning_fog': self.gunning_fog(total_sentences, total_words, total_complex_words),
56 |             'smog': self.smog(total_sentences, total_complex_words),
57 |             'linsear_write': self.linsear_write(text, total_sentences),
58 |             'coleman_liau': self.coleman_liau(total_sentences, total_words, total_characters),
59 |             'ari': self.ari(total_sentences, total_words, total_characters)
60 |         }
61 | 


--------------------------------------------------------------------------------
/pyviralcontent/likert_scale.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This class manages the Likert scale interpretation and qualitative descriptors for readability scores. 
  3 | # It provides methods to determine the Likert scale based on scores and to get qualitative descriptors for Likert scores.
  4 | class LikertScale:
  5 |     def __init__(self):
  6 |         self.likert_scale_interpretation = {
  7 |             'flesch_reading_ease': [
  8 |                 (lambda score: self.in_range(score, 90, float('inf')), 5),  # Very Easy
  9 |                 (lambda score: self.in_range(score, 70, 90), 4),            # Easy
 10 |                 (lambda score: self.in_range(score, 50, 70), 3),            # Fairly Easy
 11 |                 (lambda score: self.in_range(score, 30, 50), 2),            # Difficult
 12 |                 (lambda score: self.in_range(score, 0, 30), 1),             # Very Confusing
 13 |             ],
 14 |             'flesch_kincaid': [
 15 |                 (lambda score: score <= 5, 5),     # Very Easy
 16 |                 (lambda score: self.in_range(score, 5, 6), 4),   # Easy
 17 |                 (lambda score: self.in_range(score, 6, 7), 3),   # Fairly Easy
 18 |                 (lambda score: self.in_range(score, 7, 9), 2),   # Difficult
 19 |                 (lambda score: score >= 9, 1),     # Very Confusing
 20 |             ],
 21 |             'gunning_fog': [
 22 |                 (lambda score: score <= 6, 5),     # Very Easy
 23 |                 (lambda score: self.in_range(score, 6, 8), 4),   # Easy
 24 |                 (lambda score: self.in_range(score, 8, 12), 3),  # Fairly Easy
 25 |                 (lambda score: self.in_range(score, 12, 17), 2), # Difficult
 26 |                 (lambda score: score >= 17, 1),    # Very Confusing
 27 |             ],
 28 |             'smog': [
 29 |                 (lambda score: score <= 6, 5),     # Very Easy
 30 |                 (lambda score: self.in_range(score, 6, 8), 4),   # Easy
 31 |                 (lambda score: self.in_range(score, 8, 12), 3),  # Fairly Easy
 32 |                 (lambda score: self.in_range(score, 12, 14), 2), # Difficult
 33 |                 (lambda score: score >= 14, 1),    # Very Confusing
 34 |             ],
 35 |             'linsear_write': [
 36 |                 (lambda score: score <= 5, 5),    # Very Easy
 37 |                 (lambda score: self.in_range(score, 5, 8), 4),  # Easy
 38 |                 (lambda score: self.in_range(score, 8, 12), 3), # Fairly Easy
 39 |                 (lambda score: self.in_range(score, 12, 15), 2),# Difficult
 40 |                 (lambda score: score >= 15, 1),   # Very Confusing
 41 |             ],
 42 |             'coleman_liau': [
 43 |                 (lambda score: score <= 5, 5),    # Very Easy
 44 |                 (lambda score: self.in_range(score, 5, 8), 4),  # Easy
 45 |                 (lambda score: self.in_range(score, 8, 12), 3), # Fairly Easy
 46 |                 (lambda score: self.in_range(score, 12, 15), 2),# Difficult
 47 |                 (lambda score: score >= 15, 1),   # Very Confusing
 48 |             ],
 49 |             'ari': [
 50 |                 (lambda score: score <= 2, 5),    # Very Easy
 51 |                 (lambda score: self.in_range(score, 2, 4), 4),  # Easy
 52 |                 (lambda score: self.in_range(score, 4, 7), 3),  # Fairly Easy
 53 |                 (lambda score: self.in_range(score, 7, 10), 2), # Difficult
 54 |                 (lambda score: score >= 10, 1),   # Very Confusing
 55 |             ],
 56 |             'default': [
 57 |                 (lambda score: score <= 2, 5),    # Very Easy
 58 |                 (lambda score: self.in_range(score, 2, 4), 4),  # Easy
 59 |                 (lambda score: self.in_range(score, 4, 6), 3),  # Fairly Easy
 60 |                 (lambda score: self.in_range(score, 6, 8), 2),  # Difficult
 61 |                 (lambda score: score >= 8, 1),    # Very Confusing
 62 |             ]
 63 |         }
 64 |         
 65 |         self.qualitative_descriptors = {
 66 |             5: 'Excellent/Very Clear',
 67 |             4: 'Good/Clear',
 68 |             3: 'Average/Somewhat Clear',
 69 |             2: 'Below Average/Confusing',
 70 |             1: 'Poor/Unclear',
 71 |             0: 'Very Poor/Very Unclear'
 72 |         }
 73 | 
 74 |     @staticmethod
 75 |     def in_range(score, start, end):
 76 |         return start <= score < end
 77 | 
 78 |     def determine_likert_scale(self, score, test_name):
 79 |         scale_ranges = self.likert_scale_interpretation.get(test_name, self.likert_scale_interpretation['default'])
 80 |         for check_func, likert_value in scale_ranges:
 81 |             if check_func(score):
 82 |                 return likert_value
 83 |         return 0
 84 |     
 85 |     def max_scale(self, test_name):
 86 |         if test_name in self.likert_scale_interpretation: 
 87 |             # Find the max Likert scale value for the test
 88 |             return max(value for _, value in self.likert_scale_interpretation[test_name])
 89 |         else:
 90 |             # Default max Likert scale value
 91 |             return max(value for _, value in self.likert_scale_interpretation['default'])
 92 | 
 93 |     def get_qualitative_descriptor(self, likert_score):
 94 |         return self.qualitative_descriptors.get(likert_score, 'Undefined')
 95 |     
 96 |     def calculate_average_score(self, df):
 97 |       average_score = df['Score'].mean()
 98 |       overall_likert = self.determine_likert_scale(average_score, 'default')
 99 |       overall_quality = self.get_qualitative_descriptor(overall_likert)
100 |       return average_score, overall_likert, overall_quality


--------------------------------------------------------------------------------
/pyviralcontent/readability_calculator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from .text_analyzer import TextAnalyzer
  4 | from .likert_scale import LikertScale
  5 | 
  6 | # This class is responsible for calculating readability scores for a given text based on content type. 
  7 | # It uses TextAnalyzer to compute scores and LikertScale for interpreting scores. 
  8 | # It also provides functionality to calculate the probability of content going viral.
  9 | class ReadabilityCalculator:
 10 |     def __init__(self):
 11 |         self.content_type_formulas = {
 12 |             'scientific': ['gunning_fog', 'coleman_liau', 'ari'],
 13 |             'blog': ['flesch_reading_ease', 'flesch_kincaid'],
 14 |             'video': ['smog', 'flesch_reading_ease'],
 15 |             'technical': ['linsear_write', 'ari'],
 16 |             'fictional': ['flesch_kincaid', 'coleman_liau'],
 17 |             'legal': ['gunning_fog', 'smog'],
 18 |             'educational': ['flesch_kincaid', 'linsear_write'],
 19 |             'news': ['flesch_reading_ease', 'gunning_fog'],
 20 |             'advertising': ['flesch_reading_ease', 'coleman_liau'],
 21 |             'social_media': ['flesch_reading_ease', 'linsear_write']
 22 |         }
 23 |         self.text_analyzer = TextAnalyzer()
 24 |         self.likert_scale = LikertScale()
 25 | 
 26 |     def calculate_scores_by_content_type(self, text, content_type):
 27 |         scores = self.text_analyzer.calculate_readability_scores(text)
 28 |         selected_scores = {test: score for test, score in scores.items() if test in self.content_type_formulas[content_type]}
 29 |         
 30 |         df = pd.DataFrame(list(selected_scores.items()), columns=['Test', 'Score'])
 31 |         df['Likert_Scale'] = df.apply(lambda row: self.likert_scale.determine_likert_scale(row['Score'], row['Test']), axis=1)
 32 |         df['Qualitative_Descriptors'] = df['Likert_Scale'].apply(lambda x: self.likert_scale.get_qualitative_descriptor(x))
 33 |         return df
 34 | 
 35 |     def calculate_virality_probability(self, df):
 36 |       virality_weights = {'flesch_reading_ease': 0.15,'flesch_kincaid': 0.15,'gunning_fog': 0.1,'smog': 0.1,'linsear_write': 0.1,'coleman_liau': 0.2,'ari': 0.2}
 37 |       df['Weighted_Likert'] = df['Test'].map(virality_weights).fillna(0) * df['Likert_Scale']
 38 |       virality_score = df['Weighted_Likert'].sum() / df['Test'].map(virality_weights).fillna(0).sum()
 39 |       probability_of_going_viral = virality_score / 5  # Assuming 5 is the max Likert scale value
 40 |       return probability_of_going_viral
 41 | 
 42 |     def calculate_scores_by_content_type_keener(self, text, content_type):
 43 |         scores_dict = self.text_analyzer.calculate_readability_scores(text)
 44 |         selected_scores_dict = {test: score for test, score in scores_dict.items() if test in self.content_type_formulas[content_type]}
 45 |         
 46 |         scores = list(selected_scores_dict.values())
 47 |         keener_scores = self.keener_method(scores)
 48 |         
 49 |         keener_df = pd.DataFrame(list(selected_scores_dict.keys()), columns=['Test'])
 50 |         keener_df['Score'] = keener_scores
 51 |         keener_df['Likert_Scale'] = keener_df['Score'].apply(lambda x: self.likert_scale.determine_likert_scale(x, 'default'))
 52 |         keener_df['Qualitative_Descriptors'] = keener_df['Likert_Scale'].apply(lambda x: self.likert_scale.get_qualitative_descriptor(x))
 53 |         
 54 |         overall_score = np.dot(keener_scores, scores)
 55 |         overall_likert = self.likert_scale.determine_likert_scale(overall_score, 'default')
 56 |         overall_quality = self.likert_scale.get_qualitative_descriptor(overall_likert)
 57 |         
 58 |         overall_df = pd.DataFrame([['OVERALL SCORE', overall_score, overall_likert, overall_quality]], columns=['Test', 'Score', 'Likert_Scale', 'Qualitative_Descriptors'])
 59 |         
 60 |         return pd.concat([keener_df, overall_df], ignore_index=True)
 61 |     
 62 |     # def calculate_virality_probability(self, df):
 63 |     #   virality_weights = {
 64 |     #       'flesch_reading_ease': 0.15,
 65 |     #       'flesch_kincaid': 0.15,
 66 |     #       'gunning_fog': 0.1,
 67 |     #       'smog': 0.1,
 68 |     #       'linsear_write': 0.1,
 69 |     #       'coleman_liau': 0.2,
 70 |     #       'ari': 0.2
 71 |     #       }
 72 |     #   df['Weighted_Likert'] = df['Test'].map(virality_weights).fillna(0) * df['Likert_Scale']
 73 |     #   virality_score = df['Weighted_Likert'].sum() / df['Test'].map(virality_weights).fillna(0).sum()
 74 |     #   probability_of_going_viral = virality_score / 5  # Assuming 5 is the max Likert scale value
 75 |     #   return probability_of_going_viral
 76 |   
 77 |     def calculate_virality_probability(self, df, content_type):
 78 |         virality_weights = {'flesch_reading_ease': 0.15,'flesch_kincaid': 0.15,'gunning_fog': 0.1,
 79 |                           'smog': 0.1,'linsear_write': 0.1,'coleman_liau': 0.2,'ari': 0.2}
 80 |         df['Weighted_Likert'] = df['Test'].map(virality_weights).fillna(0) * df['Likert_Scale']
 81 |         virality_score = df['Weighted_Likert'].sum()
 82 |         # Get the tests relevant for the content type
 83 |         relevant_tests = self.content_type_formulas[content_type]
 84 |         # Calculate the sum of the maximum possible weighted scores for the relevant tests
 85 |         max_weighted_score = sum(virality_weights[test] * self.likert_scale.max_scale(test) for test in relevant_tests if test in virality_weights)
 86 |         # Normalize the virality score by the maximum possible weighted score
 87 |         probability_of_going_viral = virality_score / max_weighted_score if max_weighted_score else 0
 88 |         return probability_of_going_viral
 89 | 
 90 |     @staticmethod
 91 |     def kappa(x):
 92 |         return 0.5 + 0.5 * np.sign(x - 0.5) * np.sqrt(abs(2 * x - 1))
 93 | 
 94 |     def keener_method(self, scores):
 95 |         N = len(scores)
 96 |         S = np.zeros((N, N))
 97 |         for i in range(N):
 98 |             for j in range(N):
 99 |                 if i != j:
100 |                     S[i, j] = 1 / scores[i] if scores[i] > 0 else 0
101 | 
102 |         K = np.zeros((N, N))
103 |         for i in range(N):
104 |             for j in range(N):
105 |                 if i != j:
106 |                     K[i, j] = self.kappa((1 + S[i, j]) / (2 + S[i, j] + S[j, i]))
107 |                 else:
108 |                     K[i, j] = 0
109 | 
110 |         eigenvalues, eigenvectors = np.linalg.eig(K)
111 |         max_eigenvalue_index = np.argmax(eigenvalues.real)
112 |         ratings = eigenvectors[:, max_eigenvalue_index].real
113 |         normalized_ratings = ratings / np.sum(ratings)
114 |         
115 |         return normalized_ratings


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PyViralContent
  2 | 
  3 | `pyviralcontent` is a Python package designed to assess the readability of various types of content and predict the and the probability of the content going viral. It employs multiple readability tests and translates numerical scores into qualitative descriptors based on the Likert scale. The package supports various types of content, allowing for a tailored analysis based on the specific nature of the content.
  4 | 
  5 | ## Supported Content Types
  6 | 
  7 | The package currently supports the following content types:
  8 | 
  9 | - `scientific`
 10 | - `blog`
 11 | - `video`
 12 | - `technical`
 13 | - `fictional`
 14 | - `legal`
 15 | - `educational`
 16 | - `news`
 17 | - `advertising`
 18 | - `social_media`
 19 | 
 20 | ## Installation
 21 | 
 22 | ```
 23 | pip install pyviralcontent
 24 | ```
 25 | 
 26 | ## Usage
 27 | To analyze your content, you can use the `ContentAnalyzer` class from the `pyviralcontent` package. Here's a simple example of how to use the `ContentAnalyzer` to analyze different types of content:
 28 | 
 29 | 
 30 | ```python
 31 | from pyviralcontent import ContentAnalyzer
 32 | 
 33 | def test_content_analysis(content_type, text_content):
 34 |     """
 35 |     Test the content analysis for a given type of content and text content.
 36 | 
 37 |     :param content_type: The type of the content (e.g., 'scientific', 'blog', etc.).
 38 |     :param text_content: The actual content to analyze.
 39 |     """
 40 |     # Create an instance of ContentAnalyzer
 41 |     analyzer = ContentAnalyzer(text_content, content_type)
 42 |     # Perform the analysis
 43 |     df, viral_probability = analyzer.analyze()
 44 |     # Print the results
 45 |     print(f"\nReadability Scores Summary for {content_type.capitalize()} Content:")
 46 |     display(df)
 47 |     print(f"The probability of the content going viral is: {viral_probability * 100:.2f}%")
 48 | 
 49 | # Example 1: Scientific content
 50 | test_content_analysis(
 51 |     'scientific',
 52 |     "Implement an accessibility-first approach in the design of the website. This includes: • High-contrast visuals for low-vision users. • Text-to-speech functionality for all text, including product descriptions and checkout processes.• Easy keyboard navigation for those unable to use a mouse."
 53 | )
 54 | 
 55 | # Example 2: Blog content
 56 | test_content_analysis(
 57 |     'blog',
 58 |     "Today's blog post discusses the importance of user experience design. A good design ensures that users find joy and satisfaction in the interaction with the product, making it an essential aspect of product development."
 59 | )
 60 | 
 61 | # Example 3: Technical content
 62 | test_content_analysis(
 63 |     'technical',
 64 |     "The module utilizes an advanced algorithm for data processing, ensuring high performance and reliability. It's optimized for multi-threaded environments, offering significant improvements in processing speed and efficiency."
 65 | )
 66 | 
 67 | # Example 4: Fictional content
 68 | test_content_analysis(
 69 |     'fictional',
 70 |     "In the distant future, humanity has reached the stars. Each galaxy is a new frontier, and every planet a new adventure. Join our heroes as they navigate through cosmic dangers and discover the mysteries of the universe."
 71 | )
 72 | 
 73 | # Example 5: Legal content
 74 | test_content_analysis(
 75 |     'legal',
 76 |     "The contract stipulates the terms and conditions of the agreement and is legally binding to both parties involved. It outlines the responsibilities, duties, and liabilities in clear, unambiguous language to prevent any misunderstandings."
 77 | )
 78 | 
 79 | # Example 6: Educational content
 80 | test_content_analysis(
 81 |     'educational',
 82 |     "Today's lesson covers the fundamental principles of physics. We'll explore Newton's laws of motion, the concept of gravity, and the principles of energy and momentum. Each concept will be demonstrated with real-life examples and interactive experiments."
 83 | )
 84 | 
 85 | # Example 7: News content
 86 | test_content_analysis(
 87 |     'news',
 88 |     "In today's news, the local community is coming together to support the annual food drive. Last year's drive helped over a thousand families, and this year the organizers hope to double that number with the help of generous donations and volunteer work."
 89 | )
 90 | 
 91 | # Example 8: Advertising content
 92 | test_content_analysis(
 93 |     'advertising',
 94 |     "Introducing the latest innovation in home cleaning! Our new vacuum cleaner is equipped with advanced technology to clean your home efficiently and effortlessly. Say goodbye to dust and hello to spotless floors!"
 95 | )
 96 | 
 97 | # Example 9: Social Media content
 98 | test_content_analysis(
 99 |     'social_media',
100 |     "Just finished an amazing workout at the gym! 💪 Feeling energized and ready to take on the day. Remember, a healthy lifestyle is not just a goal, it's a way of living. #FitnessGoals #HealthyLiving"
101 | )
102 | 
103 | # Example 10: Video content
104 | test_content_analysis(
105 |     'video',
106 |     "In this video, we'll take a closer look at the intricate ecosystem of the Amazon rainforest. Discover the diverse species that call it home, and learn about the critical role it plays in our planet's climate system."
107 | )
108 | ```
109 | ![Sample Image](https://github.com/bhaskatripathi/pyviralcontent/blob/main/Sample.JPG?raw=true)
110 | 
111 | 
112 | ## Features
113 | 
114 | - Multiple readability tests for different content types.
115 | - Qualitative descriptors based on the Likert scale.
116 | - Estimation of content's virality potential.
117 | - Supported content types include: scientific, blog, video, technical, fictional, legal, educational, news, advertising, social_media.
118 | 
119 | ## How it Works?
120 | 
121 | The `PyViralContent` package offers a sophisticated approach to analyzing textual content by recognizing that no single readability metric is representative fits all content types. This is essential a Multi Criteria Decision Analysis Problem which is solved using Keener's method. Different types of content have unique stylistic and structural characteristics, and the package addresses this by associating specific readability formulas with each content type. This method ensures a nuanced analysis and provides a more accurate reflection of the content's readability and potential virality.
122 | 
123 | ### Content Type Formulas
124 | 
125 | The package defines `content_type_formulas`, a mapping of content types to the sets of readability formulas that are best suited for those types. Here's the association between content types and their corresponding readability formulas. These formulae have been integrated using Keener's MCDA method. Keener's method computes the eigenvector corresponding to the largest eigenvalue of a certain matrix derived from the pairwise comparisons. This eigenvector provides the weights or ratings of the items being compared, reflecting their relative importance or dominance in the context of the comparison.
126 | ![UML](https://github.com/bhaskatripathi/pyviralcontent/blob/main/UML.png)
127 | For a detailed explanation of Keener's method and its applications, please refer to the following resource:
128 | [Understanding Keener's Method (PDF)](https://www.dcs.bbk.ac.uk/~ale/dsta+dsat/dsta+dsat-3/lm-ch3-keener.pdf)
129 | 
130 | The `PyViralContent` package integrates Keener's method in its analytical engine to enhance the robustness and depth of the content analysis, offering users a sophisticated tool for assessing the potential impact and reach of their content.
131 | 
132 | | Content Type  | Readability Formulas Used               |
133 | |---------------|-----------------------------------------|
134 | | Scientific    | Gunning Fog, Coleman Liau, ARI          |
135 | | Blog          | Flesch Reading Ease, Flesch Kincaid     |
136 | | Video         | SMOG, Flesch Reading Ease               |
137 | | Technical     | Linsear Write, ARI                      |
138 | | Fictional     | Flesch Kincaid, Coleman Liau            |
139 | | Legal         | Gunning Fog, SMOG                       |
140 | | Educational   | Flesch Kincaid, Linsear Write           |
141 | | News          | Flesch Reading Ease, Gunning Fog        |
142 | | Advertising   | Flesch Reading Ease, Coleman Liau       |
143 | | Social Media  | Flesch Reading Ease, Linsear Write      |
144 | 
145 | ### Interpretation with Likert Scale
146 | 
147 | The results from the readability formulas are interpreted using a Likert scale, which provides a qualitative measure of the content's readability. This scale is not one-size-fits-all; it is tailored to each readability formula to accurately reflect the nuances of each metric. Here's how the Likert scale is applied for each readability formula:
148 | 
149 | | Readability Formula     | Likert Scale Interpretation (Score Range)  | Qualitative Descriptor     |
150 | |-------------------------|--------------------------------------------|----------------------------|
151 | | Flesch Reading Ease     | 90-inf: 5, 70-90: 4, 50-70: 3, 30-50: 2, 0-30: 1 | Very Easy to Very Confusing |
152 | | Flesch Kincaid          | <=5: 5, 5-6: 4, 6-7: 3, 7-9: 2, >=9: 1     | Very Easy to Very Confusing |
153 | | Gunning Fog             | <=6: 5, 6-8: 4, 8-12: 3, 12-17: 2, >=17: 1 | Very Easy to Very Confusing |
154 | | SMOG                    | <=6: 5, 6-8: 4, 8-12: 3, 12-14: 2, >=14: 1 | Very Easy to Very Confusing |
155 | | Linsear Write           | <=5: 5, 5-8: 4, 8-12: 3, 12-15: 2, >=15: 1 | Very Easy to Very Confusing |
156 | | Coleman Liau            | <=5: 5, 5-8: 4, 8-12: 3, 12-15: 2, >=15: 1 | Very Easy to Very Confusing |
157 | | ARI                     | <=2: 5, 2-4: 4, 4-7: 3, 7-10: 2, >=10: 1   | Very Easy to Very Confusing |
158 | 
159 | These ranges and descriptors ensure that the readability score is not just a number, but a meaningful indicator of how the content will likely be received by the intended audience. The `PyViralContent` package provides a detailed output, including both the readability scores from each formula used and the overall virality probability, offering valuable insights into the potential reach and impact of the content analyzed.
160 | 
161 | 
162 | ## Contributing
163 | 
164 | Contributions to `pyviralcontent` are welcome! Please feel free to submit issues, fork the repository, and create pull requests.
165 | 
166 | ## License
167 | 
168 | This project is licensed under the MIT License - see the LICENSE file for details.
169 | 
170 | ## Contact
171 | 
172 | Bhaskar Tripathi - bhaskar.tripathi@gmail.com
173 | GitHub: https://github.com/bhaskatripathi/pyviralcontent
174 | 


--------------------------------------------------------------------------------
/pyviralcontent.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
  1 | Metadata-Version: 2.1
  2 | Name: pyviralcontent
  3 | Version: 0.1.4
  4 | Summary: A package for analyzing content readability and virality potential.
  5 | Home-page: https://github.com/bhaskatripathi/pyviralcontent
  6 | Author: Bhaskar Tripathi
  7 | Author-email: bhaskar.tripathi@gmail.com
  8 | Keywords: readability virality content-analysis
  9 | Description-Content-Type: text/markdown
 10 | 
 11 | # PyViralContent
 12 | 
 13 | `pyviralcontent` is a Python package designed to assess the readability of various types of content and predict the and the probability of the content going viral. It employs multiple readability tests and translates numerical scores into qualitative descriptors based on the Likert scale. The package supports various types of content, allowing for a tailored analysis based on the specific nature of the content.
 14 | 
 15 | ## Supported Content Types
 16 | 
 17 | The package currently supports the following content types:
 18 | 
 19 | - `scientific`
 20 | - `blog`
 21 | - `video`
 22 | - `technical`
 23 | - `fictional`
 24 | - `legal`
 25 | - `educational`
 26 | - `news`
 27 | - `advertising`
 28 | - `social_media`
 29 | 
 30 | ## Installation
 31 | 
 32 | ```
 33 | pip install pyviralcontent
 34 | ```
 35 | 
 36 | ## Usage
 37 | To analyze your content, you can use the `ContentAnalyzer` class from the `pyviralcontent` package. Here's a simple example of how to use the `ContentAnalyzer` to analyze different types of content:
 38 | 
 39 | 
 40 | ```python
 41 | from pyviralcontent import ContentAnalyzer
 42 | 
 43 | def test_content_analysis(content_type, text_content):
 44 |     """
 45 |     Test the content analysis for a given type of content and text content.
 46 | 
 47 |     :param content_type: The type of the content (e.g., 'scientific', 'blog', etc.).
 48 |     :param text_content: The actual content to analyze.
 49 |     """
 50 |     # Create an instance of ContentAnalyzer
 51 |     analyzer = ContentAnalyzer(text_content, content_type)
 52 |     # Perform the analysis
 53 |     df, viral_probability = analyzer.analyze()
 54 |     # Print the results
 55 |     print(f"\nReadability Scores Summary for {content_type.capitalize()} Content:")
 56 |     display(df)
 57 |     print(f"The probability of the content going viral is: {viral_probability * 100:.2f}%")
 58 | 
 59 | # Example 1: Scientific content
 60 | test_content_analysis(
 61 |     'scientific',
 62 |     "Implement an accessibility-first approach in the design of the website. This includes: • High-contrast visuals for low-vision users. • Text-to-speech functionality for all text, including product descriptions and checkout processes.• Easy keyboard navigation for those unable to use a mouse."
 63 | )
 64 | 
 65 | # Example 2: Blog content
 66 | test_content_analysis(
 67 |     'blog',
 68 |     "Today's blog post discusses the importance of user experience design. A good design ensures that users find joy and satisfaction in the interaction with the product, making it an essential aspect of product development."
 69 | )
 70 | 
 71 | # Example 3: Technical content
 72 | test_content_analysis(
 73 |     'technical',
 74 |     "The module utilizes an advanced algorithm for data processing, ensuring high performance and reliability. It's optimized for multi-threaded environments, offering significant improvements in processing speed and efficiency."
 75 | )
 76 | 
 77 | # Example 4: Fictional content
 78 | test_content_analysis(
 79 |     'fictional',
 80 |     "In the distant future, humanity has reached the stars. Each galaxy is a new frontier, and every planet a new adventure. Join our heroes as they navigate through cosmic dangers and discover the mysteries of the universe."
 81 | )
 82 | 
 83 | # Example 5: Legal content
 84 | test_content_analysis(
 85 |     'legal',
 86 |     "The contract stipulates the terms and conditions of the agreement and is legally binding to both parties involved. It outlines the responsibilities, duties, and liabilities in clear, unambiguous language to prevent any misunderstandings."
 87 | )
 88 | 
 89 | # Example 6: Educational content
 90 | test_content_analysis(
 91 |     'educational',
 92 |     "Today's lesson covers the fundamental principles of physics. We'll explore Newton's laws of motion, the concept of gravity, and the principles of energy and momentum. Each concept will be demonstrated with real-life examples and interactive experiments."
 93 | )
 94 | 
 95 | # Example 7: News content
 96 | test_content_analysis(
 97 |     'news',
 98 |     "In today's news, the local community is coming together to support the annual food drive. Last year's drive helped over a thousand families, and this year the organizers hope to double that number with the help of generous donations and volunteer work."
 99 | )
100 | 
101 | # Example 8: Advertising content
102 | test_content_analysis(
103 |     'advertising',
104 |     "Introducing the latest innovation in home cleaning! Our new vacuum cleaner is equipped with advanced technology to clean your home efficiently and effortlessly. Say goodbye to dust and hello to spotless floors!"
105 | )
106 | 
107 | # Example 9: Social Media content
108 | test_content_analysis(
109 |     'social_media',
110 |     "Just finished an amazing workout at the gym! 💪 Feeling energized and ready to take on the day. Remember, a healthy lifestyle is not just a goal, it's a way of living. #FitnessGoals #HealthyLiving"
111 | )
112 | 
113 | # Example 10: Video content
114 | test_content_analysis(
115 |     'video',
116 |     "In this video, we'll take a closer look at the intricate ecosystem of the Amazon rainforest. Discover the diverse species that call it home, and learn about the critical role it plays in our planet's climate system."
117 | )
118 | ```
119 | ![Sample Image](https://github.com/bhaskatripathi/pyviralcontent/blob/main/Sample.JPG?raw=true)
120 | 
121 | 
122 | ## Features
123 | 
124 | - Multiple readability tests for different content types.
125 | - Qualitative descriptors based on the Likert scale.
126 | - Estimation of content's virality potential.
127 | - Supported content types include: scientific, blog, video, technical, fictional, legal, educational, news, advertising, social_media.
128 | 
129 | ## How it Works?
130 | 
131 | The `PyViralContent` package offers a sophisticated approach to analyzing textual content by recognizing that no single readability metric is representative fits all content types. This is essential a Multi Criteria Decision Analysis Problem which is solved using Keener's method. Different types of content have unique stylistic and structural characteristics, and the package addresses this by associating specific readability formulas with each content type. This method ensures a nuanced analysis and provides a more accurate reflection of the content's readability and potential virality.
132 | 
133 | ### Content Type Formulas
134 | 
135 | The package defines `content_type_formulas`, a mapping of content types to the sets of readability formulas that are best suited for those types. Here's the association between content types and their corresponding readability formulas. These formulae have been integrated using Keener's MCDA method. Keener's method computes the eigenvector corresponding to the largest eigenvalue of a certain matrix derived from the pairwise comparisons. This eigenvector provides the weights or ratings of the items being compared, reflecting their relative importance or dominance in the context of the comparison.
136 | 
137 | For a detailed explanation of Keener's method and its applications, please refer to the following resource:
138 | [Understanding Keener's Method (PDF)](https://www.dcs.bbk.ac.uk/~ale/dsta+dsat/dsta+dsat-3/lm-ch3-keener.pdf)
139 | 
140 | The `PyViralContent` package integrates Keener's method in its analytical engine to enhance the robustness and depth of the content analysis, offering users a sophisticated tool for assessing the potential impact and reach of their content.
141 | 
142 | | Content Type  | Readability Formulas Used               |
143 | |---------------|-----------------------------------------|
144 | | Scientific    | Gunning Fog, Coleman Liau, ARI          |
145 | | Blog          | Flesch Reading Ease, Flesch Kincaid     |
146 | | Video         | SMOG, Flesch Reading Ease               |
147 | | Technical     | Linsear Write, ARI                      |
148 | | Fictional     | Flesch Kincaid, Coleman Liau            |
149 | | Legal         | Gunning Fog, SMOG                       |
150 | | Educational   | Flesch Kincaid, Linsear Write           |
151 | | News          | Flesch Reading Ease, Gunning Fog        |
152 | | Advertising   | Flesch Reading Ease, Coleman Liau       |
153 | | Social Media  | Flesch Reading Ease, Linsear Write      |
154 | 
155 | ### Interpretation with Likert Scale
156 | 
157 | The results from the readability formulas are interpreted using a Likert scale, which provides a qualitative measure of the content's readability. This scale is not one-size-fits-all; it is tailored to each readability formula to accurately reflect the nuances of each metric. Here's how the Likert scale is applied for each readability formula:
158 | 
159 | | Readability Formula     | Likert Scale Interpretation (Score Range)  | Qualitative Descriptor     |
160 | |-------------------------|--------------------------------------------|----------------------------|
161 | | Flesch Reading Ease     | 90-inf: 5, 70-90: 4, 50-70: 3, 30-50: 2, 0-30: 1 | Very Easy to Very Confusing |
162 | | Flesch Kincaid          | <=5: 5, 5-6: 4, 6-7: 3, 7-9: 2, >=9: 1     | Very Easy to Very Confusing |
163 | | Gunning Fog             | <=6: 5, 6-8: 4, 8-12: 3, 12-17: 2, >=17: 1 | Very Easy to Very Confusing |
164 | | SMOG                    | <=6: 5, 6-8: 4, 8-12: 3, 12-14: 2, >=14: 1 | Very Easy to Very Confusing |
165 | | Linsear Write           | <=5: 5, 5-8: 4, 8-12: 3, 12-15: 2, >=15: 1 | Very Easy to Very Confusing |
166 | | Coleman Liau            | <=5: 5, 5-8: 4, 8-12: 3, 12-15: 2, >=15: 1 | Very Easy to Very Confusing |
167 | | ARI                     | <=2: 5, 2-4: 4, 4-7: 3, 7-10: 2, >=10: 1   | Very Easy to Very Confusing |
168 | 
169 | These ranges and descriptors ensure that the readability score is not just a number, but a meaningful indicator of how the content will likely be received by the intended audience. The `PyViralContent` package provides a detailed output, including both the readability scores from each formula used and the overall virality probability, offering valuable insights into the potential reach and impact of the content analyzed.
170 | 
171 | 
172 | ## Contributing
173 | 
174 | Contributions to `pyviralcontent` are welcome! Please feel free to submit issues, fork the repository, and create pull requests.
175 | 
176 | ## License
177 | 
178 | This project is licensed under the MIT License - see the LICENSE file for details.
179 | 
180 | ## Contact
181 | 
182 | Bhaskar Tripathi - bhaskar.tripathi@gmail.com
183 | GitHub: https://github.com/bhaskatripathi/pyviralcontent
184 | 


--------------------------------------------------------------------------------