├── tests
    ├── __init__.py
    ├── test_inspirobot.py
    ├── test_pexels.py
    ├── test_pixabay.py
    ├── test_shitpostbot.py
    ├── test_unsplash.py
    ├── test_wikihow.py
    ├── test_phrasefinder.py
    ├── test_goodreads.py
    ├── test_reddit.py
    ├── test_conceptnet.py
    ├── time_test.py
    ├── test_talkgenerator_multipletopics.py
    ├── test_random_util.py
    ├── test_slide_topic_generators.py
    ├── test_specific_text_generators.py
    ├── test_talkgenerator.py
    ├── test_language_util.py
    └── test_text_generator.py
├── talkgenerator
    ├── schema
    │   ├── __init__.py
    │   ├── presentation_schema_types.py
    │   ├── content_generator_structures.py
    │   ├── slide_topic_generators.py
    │   └── content_generators.py
    ├── slide
    │   ├── __init__.py
    │   ├── slide_deck.py
    │   ├── slides.py
    │   └── powerpoint_slide_creator.py
    ├── sources
    │   ├── __init__.py
    │   ├── inspirobot.py
    │   ├── goodreads.py
    │   ├── reddit.py
    │   ├── pexels.py
    │   ├── pixabay.py
    │   ├── phrasefinder.py
    │   ├── unsplash.py
    │   ├── shitpostbot.py
    │   ├── conceptnet.py
    │   ├── wikihow.py
    │   ├── text_generator.py
    │   └── chart.py
    ├── util
    │   ├── __init__.py
    │   ├── scraper_util.py
    │   ├── cache_util.py
    │   ├── random_util.py
    │   ├── os_util.py
    │   ├── language_util.py
    │   └── generator_util.py
    ├── datastructures
    │   ├── __init__.py
    │   ├── image_data.py
    │   └── slide_generator_data.py
    ├── __init__.py
    ├── data
    │   ├── powerpoint
    │   │   └── template.pptx
    │   ├── text-templates
    │   │   ├── triple_captions.txt
    │   │   ├── about_me_title.txt
    │   │   ├── history.txt
    │   │   ├── conclusion_title.txt
    │   │   ├── anecdote_title.txt
    │   │   ├── deep_abstract.txt
    │   │   ├── history_person.txt
    │   │   ├── historic_double_captions.txt
    │   │   ├── anticipation_title.txt
    │   │   ├── inspiration.txt
    │   │   ├── default_slide_title.txt
    │   │   ├── bold_statements.txt
    │   │   ├── anecdote_prompt.txt
    │   │   ├── double_captions.txt
    │   │   ├── conclusion_tuple.json
    │   │   ├── chart_texts.json
    │   │   ├── talk_subtitle.json
    │   │   └── talk_title.json
    │   ├── images
    │   │   ├── black-transparent.png
    │   │   ├── black-transparent.psd
    │   │   └── error_placeholder.png
    │   ├── prohibited_images
    │   │   ├── denied.jpg
    │   │   ├── imgur_removed.jpg
    │   │   ├── huge_domains_ad.gif
    │   │   ├── imgur_removed_2.jpg
    │   │   ├── tinypic_removed.png
    │   │   └── tinypic_removed2.jpg
    │   └── eval
    │   │   └── common_words.txt
    ├── run.py
    ├── runtime_checker.py
    ├── settings.py
    └── generator.py
├── .pre-commit-config.yaml
├── SECURITY.md
├── pytest.ini
├── run_nltk_download.py
├── requirements.txt
├── LICENSE
├── .circleci
    └── config.yml
├── setup.py
├── .gitignore
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/talkgenerator/schema/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/talkgenerator/slide/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/talkgenerator/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/talkgenerator/datastructures/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/talkgenerator/__init__.py:
--------------------------------------------------------------------------------
1 | name = "talkgenerator"
2 | 


--------------------------------------------------------------------------------
/talkgenerator/data/powerpoint/template.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/powerpoint/template.pptx


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/triple_captions.txt:
--------------------------------------------------------------------------------
1 | Past|Currently|Future
2 | Danger 1|Danger 2|Danger 3
3 | Good|Better|Best
4 | Bad|Worse|Worst


--------------------------------------------------------------------------------
/talkgenerator/data/images/black-transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/images/black-transparent.png


--------------------------------------------------------------------------------
/talkgenerator/data/images/black-transparent.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/images/black-transparent.psd


--------------------------------------------------------------------------------
/talkgenerator/data/images/error_placeholder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/images/error_placeholder.png


--------------------------------------------------------------------------------
/talkgenerator/data/prohibited_images/denied.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/denied.jpg


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 |   - repo: https://github.com/ambv/black
3 |     rev: stable
4 |     hooks:
5 |     - id: black
6 |       language_version: python3.7


--------------------------------------------------------------------------------
/talkgenerator/data/prohibited_images/imgur_removed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/imgur_removed.jpg


--------------------------------------------------------------------------------
/talkgenerator/data/prohibited_images/huge_domains_ad.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/huge_domains_ad.gif


--------------------------------------------------------------------------------
/talkgenerator/data/prohibited_images/imgur_removed_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/imgur_removed_2.jpg


--------------------------------------------------------------------------------
/talkgenerator/data/prohibited_images/tinypic_removed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/tinypic_removed.png


--------------------------------------------------------------------------------
/talkgenerator/data/prohibited_images/tinypic_removed2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/tinypic_removed2.jpg


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | 
3 | ## Supported Versions
4 | 
5 | | Version | Supported          |
6 | | ------- | ------------------ |
7 | | 3.0   | :white_check_mark: |
8 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | log_cli=true
3 | log_cli_level=DEBUG
4 | log_date_format=%Y-%m-%d %H:%M:%S
5 | log_format=[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s
6 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/about_me_title.txt:
--------------------------------------------------------------------------------
1 | About me
2 | A little bit about me
3 | Who am I?
4 | Some things about me
5 | My background
6 | Personal background
7 | {presenter}
8 | {presenter}'s bio
9 | Short bio


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/history.txt:
--------------------------------------------------------------------------------
1 | History
2 | Some {seed.singular.title} History
3 | Historic Background
4 | Important Figures in {seed.title} History
5 | Quick Historical Note
6 | History of {seed.plural.title}


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/conclusion_title.txt:
--------------------------------------------------------------------------------
 1 | Conclusions
 2 | Conclusions
 3 | Conclusion
 4 | Key Points to End
 5 | Ending Points
 6 | To Wrap Up:
 7 | Remember:
 8 | Final points
 9 | {title}?
10 | "{title}"
11 | To end: "{title}"


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/anecdote_title.txt:
--------------------------------------------------------------------------------
1 | My story about {seed.title}
2 | Funny thing that once happened with {seed.a}
3 | My {seed.title} Experience
4 | How I got into {topic.title}?
5 | Anecdote Time!
6 | Little Anecdote
7 | Anecdote
8 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/deep_abstract.txt:
--------------------------------------------------------------------------------
 1 | Why?
 2 | But... Why?
 3 | What if ... ?
 4 | And?
 5 | Where to look?
 6 | Let's go!
 7 | What?
 8 | How?
 9 | Interaction
10 | Now what?
11 | What should we do about this?
12 | What can YOU do about this?
13 | How can YOU help?


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/history_person.txt:
--------------------------------------------------------------------------------
1 | Historical {seed.title} People
2 | The First Person to {seed.wikihow_action}
3 | The Inventor of {seed.wikihow_action.ing}
4 | First Olympic "{seed.wikihow_action.ing.dashes}" Champion
5 | First Winner of the "{seed.wikihow_action.ing.dashes}" Championships


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/historic_double_captions.txt:
--------------------------------------------------------------------------------
1 | Before|Afterwards
2 | First|Then
3 | Initially|Afterwards
4 | Early 1990's|Couple of years later
5 | Before the invention of {seed.plural.lower}|After
6 | Before the invention of {seed.wikihow_action.ing.lower}|After
7 | What people looked like before they knew how to {seed.wikihow_action.lower}|After they realised the importance of {seed.plural}


--------------------------------------------------------------------------------
/tests/test_inspirobot.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from talkgenerator.sources.inspirobot import get_random_inspirobot_image
 4 | 
 5 | 
 6 | class InspirobotTest(unittest.TestCase):
 7 |     def test_something(self):
 8 |         image = get_random_inspirobot_image()
 9 |         self.assertIsNotNone(image)
10 |         print(image)
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     unittest.main()
15 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/anticipation_title.txt:
--------------------------------------------------------------------------------
 1 | Let's look at this
 2 | Look at this!
 3 | Now this:
 4 | This amazed me earlier
 5 | Can you see it?
 6 | How does {seed.singular} help?
 7 | Getting {seed.plural.title} Involved
 8 | How to {seed.wikihow_action}
 9 | "But what about {seed.plural}?"
10 | "But what do you do with {seed.plural}?", well...
11 | What I say to people complaining about {seed.plural}


--------------------------------------------------------------------------------
/talkgenerator/run.py:
--------------------------------------------------------------------------------
 1 | from talkgenerator import generator
 2 | 
 3 | 
 4 | def main(args):
 5 |     """Main run method for command line talk generation."""
 6 |     presentations, slide_deck, output_file = generator.generate_presentation_using_cli_arguments(
 7 |         args
 8 |     )
 9 | 
10 | 
11 | def main_cli():
12 |     args = generator.get_argument_parser().parse_args()
13 |     main(args)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     main_cli()
18 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/inspiration.txt:
--------------------------------------------------------------------------------
 1 | Something to think about
 2 | Some Inspiration
 3 | Something my father always told me
 4 | My mother always told me
 5 | My grandma always said:
 6 | Think about it...
 7 | Consequence:
 8 | Thus...
 9 | Logical Implication
10 | Logical Implication for {seed.plural.title}
11 | Implication
12 | Something that motivated me
13 | Food for Thought
14 | My Life Motto
15 | Best Thing About {seed.plural.title}
16 | What {seed.plural.title} Really Think:


--------------------------------------------------------------------------------
/talkgenerator/util/scraper_util.py:
--------------------------------------------------------------------------------
 1 | def create_page_scraper(scraping_function):
 2 |     def scrape_pages(search_term, amount):
 3 |         results = []
 4 |         page = 1
 5 |         while len(results) < amount:
 6 |             new_quotes = scraping_function(search_term, page)
 7 |             if not new_quotes:
 8 |                 break
 9 |             results.extend(new_quotes)
10 |             page += 1
11 | 
12 |         return results[0:amount]
13 | 
14 |     return scrape_pages
15 | 


--------------------------------------------------------------------------------
/tests/test_pexels.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from talkgenerator.sources import pexels
 4 | 
 5 | 
 6 | class PexelsTest(unittest.TestCase):
 7 |     def test_pexels_access(self):
 8 |         images = pexels.search_photos("office")
 9 |         self.assertTrue(len(images) > 0)
10 |         sources = [
11 |             image.get_source() for image in images if image.get_source() is not None
12 |         ]
13 |         self.assertTrue(len(sources) > 0)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     unittest.main()
18 | 


--------------------------------------------------------------------------------
/talkgenerator/util/cache_util.py:
--------------------------------------------------------------------------------
 1 | # from https://stackoverflow.com/questions/1151658/python-hashable-dicts
 2 | class HashableDict(dict):
 3 |     """ A hashable version of a dictionary, useful for when a function needs to be cached but uses a dict as an
 4 |     argument """
 5 | 
 6 |     def __key(self):
 7 |         return tuple((k, self[k]) for k in sorted(self))
 8 | 
 9 |     def __hash__(self):
10 |         return hash(self.__key())
11 | 
12 |     def __eq__(self, other):
13 |         return self.__key() == other.__key()
14 | 


--------------------------------------------------------------------------------
/tests/test_pixabay.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from talkgenerator.sources import unsplash, pixabay
 3 | 
 4 | 
 5 | class PixabayTest(unittest.TestCase):
 6 |     def test_pixabay_access(self):
 7 |         images = pixabay.search_photos("office chair")
 8 |         self.assertTrue(len(images) > 0)
 9 |         sources = [
10 |             image.get_source() for image in images if image.get_source() is not None
11 |         ]
12 |         self.assertTrue(len(sources) > 0)
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     unittest.main()
17 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/default_slide_title.txt:
--------------------------------------------------------------------------------
 1 | {seed.plural.title}
 2 | {seed.plural.title}
 3 | About {seed.is_noun.plural.title}
 4 | About {seed.is_noun.plural.title}
 5 | About {seed.is_noun.plural.title}
 6 | What about {seed.is_noun.plural.title}?
 7 | What about {seed.is_verb.ing.title}?
 8 | Let's look at {seed.is_noun.plural.title}
 9 | Let's Discuss {seed.is_noun.plural.title}
10 | Let's look at {seed.is_verb.ing.title}
11 | Let's Discuss {seed.is_verb.ing.title}
12 | Getting {seed.is_noun.plural.title} involved
13 | Getting {seed.is_verb.ing.title} involved
14 | Discussion


--------------------------------------------------------------------------------
/talkgenerator/runtime_checker.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import talkgenerator.settings
 4 | import talkgenerator.util.language_util
 5 | 
 6 | logger = logging.getLogger("talkgenerator")
 7 | 
 8 | 
 9 | def check_runtime_environment():
10 |     check_env = talkgenerator.settings.check_environment_variables()
11 |     if check_env:
12 |         logger.info("Successful check: Environment variables")
13 | 
14 |     check_ntlk = talkgenerator.util.language_util.check_and_download()
15 |     if check_ntlk:
16 |         logger.info("Successful check: NLTK Dictionaries available")
17 | 
18 |     return check_ntlk
19 | 


--------------------------------------------------------------------------------
/talkgenerator/util/random_util.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | # From https://stackoverflow.com/questions/14992521/python-weighted-random
 5 | def weighted_random(pairs):
 6 |     if len(pairs) == 0:
 7 |         return None
 8 |     total = sum(pair[0] for pair in pairs)
 9 |     r = random.uniform(0, total)
10 |     for (weight, value) in pairs:
11 |         r -= weight
12 |         if r <= 0:
13 |             return value
14 | 
15 | 
16 | def choice_optional(lst):
17 |     """" Returns random.choice if there are elements, None otherwise """
18 |     if len(lst) > 0:
19 |         return random.choice(lst)
20 |     return None
21 | 


--------------------------------------------------------------------------------
/tests/test_shitpostbot.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from talkgenerator.sources import shitpostbot
 3 | 
 4 | 
 5 | class ShitPostBot(unittest.TestCase):
 6 |     def test_shitpostbot_search(self):
 7 |         image_urls = shitpostbot.search_images("cat")
 8 |         self.assertTrue(len(image_urls) > 0)
 9 | 
10 |     def test_shitpostbot_search_rated(self):
11 |         image_urls = shitpostbot.search_images_rated("cat")
12 |         self.assertTrue(len(image_urls) > 0)
13 |         # Check if the rating of the first one is large
14 |         self.assertTrue(int(image_urls[0][0]) > 20)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     unittest.main()
19 | 


--------------------------------------------------------------------------------
/talkgenerator/datastructures/image_data.py:
--------------------------------------------------------------------------------
 1 | class ImageData:
 2 |     def __init__(self, image_url: str, source: str = None):
 3 |         self._image_url = image_url
 4 |         self._source = source
 5 | 
 6 |     def get_image_url(self) -> str:
 7 |         return self._image_url
 8 | 
 9 |     def get_source(self) -> str:
10 |         return self._source
11 | 
12 |     def __str__(self):
13 |         return (
14 |             "ImageData("
15 |             + self._image_url
16 |             + ((", " + self._source) if self._source is not None else "")
17 |             + ")"
18 |         )
19 | 
20 |     def __repr__(self):
21 |         return str(self)
22 | 


--------------------------------------------------------------------------------
/run_nltk_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """NLTK downloading with SSL handling
 3 | """
 4 | 
 5 | import ssl
 6 | import nltk
 7 | 
 8 | 
 9 | try:
10 |     _create_unverified_https_context = ssl._create_unverified_context  # pylint: disable=protected-access
11 | except AttributeError:
12 |     pass
13 | else:
14 |     ssl._create_default_https_context = _create_unverified_https_context  # pylint: disable=protected-access
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     nltk.download('punkt')
19 |     nltk.download('averaged_perceptron_tagger')
20 |     nltk.download('wordnet')
21 |     nltk.download('pros_cons')
22 |     nltk.download('reuters')
23 | 


--------------------------------------------------------------------------------
/tests/test_unsplash.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from talkgenerator.sources import unsplash
 3 | 
 4 | 
 5 | class UnsplashTest(unittest.TestCase):
 6 |     def test_unsplash_access(self):
 7 |         images = unsplash.search_photos("office")
 8 |         self.assertTrue(len(images) > 0)
 9 |         sources = [
10 |             image.get_source() for image in images if image.get_source() is not None
11 |         ]
12 |         self.assertTrue(len(sources) > 0)
13 | 
14 |     def test_unsplash_random(self):
15 |         image = unsplash.random()
16 |         print(image)
17 |         self.assertTrue(image)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     unittest.main()
22 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/inspirobot.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from talkgenerator.util import os_util
 4 | from talkgenerator.datastructures.image_data import ImageData
 5 | 
 6 | 
 7 | def get_random_inspirobot_image(_=None):
 8 |     # Generate a random url to access inspirobot
 9 |     dd = str(random.randint(1, 73)).zfill(2)
10 |     nnnn = random.randint(0, 9998)
11 |     inspirobot_url = ("http://generated.inspirobot.me/" "0{}/aXm{}xjU.jpg").format(
12 |         dd, nnnn
13 |     )
14 | 
15 |     # Download the image
16 |     # image_url = os_util.to_actual_file(
17 |     #     "downloads/inspirobot/{}-{}.jpg".format(dd, nnnn)
18 |     # )
19 |     # os_util.download_image(inspirobot_url, image_url)
20 | 
21 |     return ImageData(image_url=inspirobot_url, source="Inspirobot")
22 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/bold_statements.txt:
--------------------------------------------------------------------------------
 1 | Don't We All {action}?
 2 | What's the Best Way to {action}?
 3 | I hate {action.ing.title}
 4 | Is There a Way to Avoid Having to {action}?
 5 | The Hidden Link Between {topic.plural.title} and {action.ing.title}
 6 | But What's the Effect of {action.ing} on {topic.plural.title}?
 7 | Don't {action}. Just {step}!
 8 | Don't {action}. Just {topic.wikihow_action}!
 9 | It's Hard to {action} in {location}
10 | Everything You Know About {action.ing} is Wrong!
11 | Life hack: Always {action}!
12 | I Will Teach You How To {action}!
13 | Life Advice: {action}!
14 | Life Advice: Never {action}!
15 | WARNING: Never {action}!
16 | Friendly Reminder to {action}
17 | When in Doubt: {action}
18 | {seed.conceptnet_location} is the best place to do {action}!


--------------------------------------------------------------------------------
/tests/test_wikihow.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from talkgenerator.sources import wikihow
 3 | 
 4 | 
 5 | class WikiHowTest(unittest.TestCase):
 6 |     def test_wrong_wikihow_links_regression_test(self):
 7 |         actions = wikihow.get_related_wikihow_actions_basic_search("cat")
 8 |         print(actions)
 9 |         self.assertFalse("articles from wikiHow" in actions)
10 | 
11 |     def test_no_views_in_wikihow_action(self):
12 |         actions = wikihow.get_related_wikihow_actions("grass")
13 |         for action in actions:
14 |             # No line breaks allowed
15 |             self.assertFalse("\n" in action)
16 |             # No number of views
17 |             self.assertFalse(" views" in action and "Updated" in action)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     unittest.main()
22 | 


--------------------------------------------------------------------------------
/tests/test_phrasefinder.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from talkgenerator.sources import phrasefinder
 4 | 
 5 | 
 6 | class PhraseFinderTest(unittest.TestCase):
 7 |     def test_phrasefinder_absolute_frequency(self):
 8 |         self.assertEqual(7506109, phrasefinder.get_absolute_frequency("cat"))
 9 | 
10 |     def test_phrasefinder_absolute_frequency_any_casing(self):
11 |         self.assertEqual(
12 |             10307263, phrasefinder.get_absolute_frequency_any_casing("cat")
13 |         )
14 | 
15 |     def test_phrasefinder_rarest_word(self):
16 |         self.assertEqual("cat", phrasefinder.get_rarest_word("Why I love my cat"))
17 |         self.assertEqual(
18 |             "Peace", phrasefinder.get_rarest_word("Peace is what I want most")
19 |         )
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     unittest.main()
24 | 


--------------------------------------------------------------------------------
/tests/test_goodreads.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from talkgenerator.sources import goodreads
 4 | 
 5 | 
 6 | class GoodReadsTest(unittest.TestCase):
 7 |     def test_cat_search(self):
 8 |         cat_quotes = goodreads.search_quotes("cat", 5)
 9 |         # Check if starts with quote marks
10 |         self.assertEqual('"', cat_quotes[0][0])
11 | 
12 |     def test_too_many_quotes_amount(self):
13 |         quotes = goodreads.search_quotes("cat nine tails", 25)
14 |         # Check if starts with quote marks
15 |         self.assertTrue(bool(quotes) and len(quotes) > 0)
16 | 
17 |     def test_no_quotes(self):
18 |         quotes = goodreads.search_quotes("qsdfqsdfq", 100)
19 |         # Check if starts with quote marks
20 |         self.assertTrue(len(quotes) == 0)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     unittest.main()
25 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/anecdote_prompt.txt:
--------------------------------------------------------------------------------
 1 | The time I got to {seed.wikihow_action.2_to_1_pronouns.lower}
 2 | My first time {seed.wikihow_action.ing.2_to_1_pronouns.lower}
 3 | What happened during my first time {seed.wikihow_action.ing.2_to_1_pronouns.lower}
 4 | When did I learn how to {seed.wikihow_action.2_to_1_pronouns.lower}?
 5 | My {seed.title.dashes} Accident
 6 | My story about {seed.plural}
 7 | Funny thing that once happened with my {seed}
 8 | My {seed.plural.title} Experience: how I learned to not {seed.wikihow_action.lower}...
 9 | My experience with {seed.plural}
10 | How did I get into {topic.plural}? Because of {seed.plural}!
11 | Funny story about why I first learned how to {seed.wikihow_action.2_to_1_pronouns.lower}
12 | What I always say to people who {seed.wikihow_action}
13 | "But what about {seed.wikihow_action.ing.lower}?" Well...
14 | Why would people like me ever {seed.wikihow_action.2_to_1_pronouns}?


--------------------------------------------------------------------------------
/tests/test_reddit.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from talkgenerator.sources import reddit
 4 | from talkgenerator.schema.content_generator_structures import RedditImageSearcher
 5 | 
 6 | 
 7 | class RedditTest(unittest.TestCase):
 8 |     def test_reddit_search_image(self):
 9 |         result = reddit.search_subreddit(
10 |             "memes", str("cat") + " nsfw:no (url:.jpg OR url:.png OR url:.gif)"
11 |         )
12 |         print("Result from reddit is", result)
13 |         self.assertIsNotNone(result)
14 |         self.assertTrue(len(result) > 0)
15 | 
16 |     def test_reddit_simple(self):
17 |         images = RedditImageSearcher("memes")("cat")
18 |         self.assertTrue(len(images) > 0)
19 |         sources = [
20 |             image.get_source() for image in images if image.get_source() is not None
21 |         ]
22 |         self.assertTrue(len(sources) > 0)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------
/tests/test_conceptnet.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import unittest
 3 | 
 4 | from talkgenerator.sources import conceptnet
 5 | 
 6 | 
 7 | class ConceptNetTest(unittest.TestCase):
 8 |     def test_conceptnet_standard(self):
 9 |         related_words = conceptnet.get_weighted_related_words("cat", 10)
10 |         self.assertTrue(len(related_words) < 10)
11 |         self.assertTrue(0 < len(related_words))
12 | 
13 |     def test_conceptnet_only_english(self):
14 |         related_words = conceptnet.get_weighted_related_words("crane", 50)
15 |         print(related_words)
16 |         self.assertFalse("" in related_words)
17 |         self.assertFalse("erav" in related_words)
18 |         self.assertFalse("ždral" in related_words)
19 |         self.assertFalse("dral" in related_words)
20 | 
21 |     def test_conceptnet_multiword(self):
22 |         related_words = conceptnet.get_weighted_related_words("my lap", 10)
23 |         self.assertTrue(len(related_words) > 0)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     unittest.main()
28 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.4
 2 | attrs==20.1.0
 3 | beautifulsoup4==4.9.1
 4 | black==21.7b0
 5 | certifi==2023.7.22
 6 | chardet==3.0.4
 7 | charset-normalizer==2.0.4
 8 | click==7.1.2
 9 | codecov==2.1.9
10 | coverage==5.2.1
11 | environs==8.0.0
12 | idna==2.10
13 | importlib-metadata==1.7.0
14 | inflect==4.1.0
15 | iniconfig==1.0.1
16 | joblib==1.2.0
17 | lxml==4.9.1
18 | marshmallow==3.7.1
19 | more-itertools==8.5.0
20 | mypy-extensions==0.4.3
21 | nltk>=3.6.4
22 | packaging==20.4
23 | pathspec==0.9.0
24 | pathtools==0.1.2
25 | pexels-api==1.0.1
26 | Pillow==9.3.0
27 | pluggy==0.13.1
28 | portalocker==2.0.0
29 | praw==7.5.0
30 | py==1.10.0
31 | pyparsing==2.4.7
32 | pytest==6.0.1
33 | pytest-cov==2.10.1
34 | python-dotenv==0.14.0
35 | python-pptx==0.6.18
36 | pyunsplash==1.0.0b9
37 | regex
38 | requests==2.31.0
39 | six==1.15.0
40 | soupsieve==2.0.1
41 | toml==0.10.1
42 | tomli==1.2.1
43 | tqdm==4.48.2
44 | tracery==0.1.1
45 | update-checker==0.18.0
46 | urllib3==1.25.10
47 | watchdog==0.10.3
48 | websocket-client==0.57.0
49 | XlsxWriter==1.3.3
50 | zipp==3.1.0
51 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018-2019 Kory Mathewson and Thomas Winters and Shaun Farrugia
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/goodreads.py:
--------------------------------------------------------------------------------
 1 | from functools import lru_cache
 2 | from pathlib import Path
 3 | 
 4 | import requests
 5 | from bs4 import BeautifulSoup
 6 | # from cachier import cachier
 7 | 
 8 | from talkgenerator.util import scraper_util
 9 | 
10 | quote_search_url = (
11 |     "https://www.goodreads.com/search?page={}&q={"
12 |     "}&search%5Bsource%5D=goodreads&search_type=quotes&tab=quotes "
13 | )
14 | 
15 | 
16 | @lru_cache(maxsize=20)
17 | # @cachier(cache_dir=Path("..", "tmp").absolute())
18 | def _search_quotes_page(search_term, page):
19 |     url = quote_search_url.format(page, search_term.replace(" ", "+"))
20 |     try:
21 |         page = requests.get(url, timeout=5)
22 |     except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
23 |         return None
24 |     if page:
25 |         soup = BeautifulSoup(page.content, "html.parser")
26 |         # Replace breaks with new lines
27 |         for br in soup.find_all("br"):
28 |             br.replace_with("\n")
29 | 
30 |         # Extract the right text parts
31 |         quote_elements = soup.find_all("div", class_="quoteText")
32 |         quotes = [
33 |             " ".join([part.strip() for part in quote.get_text().split("—")][0:-1])
34 |             for quote in quote_elements
35 |         ]
36 | 
37 |         return quotes
38 | 
39 | 
40 | search_quotes = scraper_util.create_page_scraper(_search_quotes_page)
41 | 


--------------------------------------------------------------------------------
/tests/time_test.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from talkgenerator import generator
 4 | from talkgenerator.util import os_util
 5 | 
 6 | 
 7 | def run_time_test(start_idx, end_idx):
 8 |     words = os_util.read_lines("data/eval/common_words.txt")[start_idx:end_idx]
 9 |     result_file = open("data/eval/timings.txt", "a+")
10 | 
11 |     for topic in words:
12 |         args = generator.get_argument_parser().parse_args(
13 |             [
14 |                 "--topic",
15 |                 topic,
16 |                 "--num_slides",
17 |                 "7",
18 |                 "--save_ppt",
19 |                 "True",
20 |                 "--open_ppt",
21 |                 "False",
22 |                 "--parallel",
23 |                 "True",
24 |             ]
25 |         )
26 | 
27 |         start = time.process_time()
28 |         clock_start = time.perf_counter()
29 | 
30 |         generator.generate_presentation_using_cli_arguments(args)
31 | 
32 |         end = time.process_time()
33 |         clock_end = time.perf_counter()
34 |         timing = end - start
35 |         clock_timing = clock_end - clock_start
36 |         print(
37 |             "It took {} seconds to generate the presentation"
38 |             + ", and {} seconds system-wide ".format(str(timing), str(clock_timing))
39 |         )
40 |         result_file.write(topic + ", " + str(timing) + ", " + str(clock_timing) + "\n")
41 |         result_file.flush()
42 | 
43 |     result_file.close()
44 | 
45 | 
46 | # run_time_test(0, 200)
47 | 


--------------------------------------------------------------------------------
/tests/test_talkgenerator_multipletopics.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import unittest
 3 | from unittest import mock
 4 | 
 5 | from talkgenerator.schema import slide_schemas
 6 | from talkgenerator import generator
 7 | from talkgenerator.slide import powerpoint_slide_creator
 8 | from talkgenerator.util import os_util
 9 | 
10 | 
11 | class TestTalkGenerator(unittest.TestCase):
12 |     def setUp(self):
13 |         random.seed(1)
14 |         self.default_args = mock.Mock()
15 |         self.default_args.configure_mock(topic="cat")
16 |         self.default_args.configure_mock(num_slides=3)
17 |         self.default_args.configure_mock(schema="default")
18 |         self.default_args.configure_mock(title=None)
19 |         self.default_args.configure_mock(parallel=True)
20 |         self.default_args.configure_mock(
21 |             output_folder=os_util.to_actual_file("../output/test/")
22 |         )
23 |         self.default_args.configure_mock(open_ppt=False)
24 |         self.default_args.configure_mock(save_ppt=True)
25 |         self.default_args.configure_mock(int_seed=123)
26 | 
27 |     def test_multiple_topics(self):
28 |         self.default_args.configure_mock(topic="cat, dog, bread, house")
29 |         self.default_args.configure_mock(num_slides=6)
30 |         ppt, _, _ = generator.generate_presentation_using_cli_arguments(
31 |             self.default_args
32 |         )
33 |         self.assertEqual(6, len(ppt.slides))
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     unittest.main()
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/double_captions.txt:
--------------------------------------------------------------------------------
 1 | Bad {seed.plural.title}|Good {seed.plural.title}
 2 | Bad {seed.plural.title}|Worse {seed.plural.title}
 3 | Bad|Good
 4 | Bad|Worse
 5 | Before|After
 6 | Do {seed.unique.wikihow_action.lower}|Don't {seed.wikihow_action.lower}
 7 | Don't {seed.unique.wikihow_action.lower}|Do {seed.wikihow_action.lower}
 8 | Don't|Do
 9 | Do|Don't
10 | Expectation|Reality
11 | Good|Bad
12 | Good|Better
13 | How I initially approached {seed.wikihow_action.ing.lower}|What I should have done
14 | How I see it|What it actually is
15 | How I see {seed.plural}:|What {seed.plural} actually are:
16 | How I used to {seed.unique.wikihow_action.lower}|After I learned how to {seed.wikihow_action.lower}
17 | How people used to {seed.wikihow_action.lower}|My proposal
18 | How people used to {seed.wikihow_action.lower}|Now
19 | How they see it|What it actually is
20 | How they see {seed.wikihow_action.ing.lower}|What it actually is
21 | How they see {seed.wikihow_action.ing.lower}|What {seed.singular} actually means
22 | Let's turn this:|Into this:
23 | Me|Them
24 | My {seed.singular.title} Plan|Their {seed.singular.title} Plan
25 | Past|Future
26 | Past|Present
27 | Plan|Execution
28 | Present|Future
29 | Previously|Currently
30 | Stereotype|Reality
31 | The Plan|How it worked out
32 | The {seed.title} Plan|How {seed.singular.lower} worked out
33 | What I initially did|The actual solution
34 | Why {seed.plural} are great|Why {seed.plural} are awful
35 | {seed.title}'s Stereotype|{seed.title}'s Reality
36 | {seed.unique.wikihow_action.title}|{seed.wikihow_action.title}


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Python CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details
 4 | #
 5 | version: 2
 6 | jobs:
 7 |   build:
 8 |     docker:
 9 |       - image: circleci/python:latest
10 | 
11 |     working_directory: ~/talkgenerator
12 | 
13 |     steps:
14 |       - checkout
15 | 
16 |       # Download and cache dependencies
17 |       - restore_cache:
18 |           keys:
19 |           - v1-dependencies-{{ checksum "requirements.txt" }}
20 |           # fallback to using the latest cache if no exact match is found
21 |           - v1-dependencies-
22 | 
23 |       - run:
24 |           name: install dependencies
25 |           command: |
26 |             python3 -m venv venv
27 |             . venv/bin/activate
28 |             pip install --upgrade pip setuptools
29 |             python3 -m pip install -r requirements.txt --use-feature=2020-resolver
30 |             python run_nltk_download.py
31 | 
32 |       - save_cache:
33 |           paths:
34 |             - ./venv
35 |           key: v1-dependencies-{{ checksum "requirements.txt" }}
36 | 
37 |       # run tests!
38 |       - run:
39 |           name: run tests
40 |           command: |
41 |             python3 -m venv venv
42 |             . venv/bin/activate
43 |             pip install --upgrade pip setuptools
44 |             pip install pytest
45 |             pip install pytest-cov
46 |             pip install codecov
47 |             pytest --cov=talkgenerator tests/
48 |             codecov --token=e25ce249-b3da-4d80-b8d3-074f7d288969
49 | 
50 |       - store_artifacts:
51 |           path: test-reports
52 |           destination: test-reports


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/conclusion_tuple.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "two_conclusions": [
 3 |     "Conclusion 1|Conclusion 2",
 4 |     "|",
 5 |     "Conclusion 1|{title}",
 6 |     "#first_description#|#punchline_description#",
 7 |     "#first_description#|#punchline_description#",
 8 |     "#first_description#|#punchline_description#"
 9 |   ],
10 |   "three_conclusions": [
11 |     "Conclusion 1|Conclusion 2|Conclusion 3",
12 |     "||",
13 |     "Conclusion 1|Conclusion 2|{title}",
14 |     "#first_description#|#first_description#|#punchline_description#",
15 |     "#first_description#|#first_description#|#punchline_description#",
16 |     "#first_description#|#first_description#|#punchline_description#"
17 |   ],
18 |   "first_description": [
19 |     "#random_nice_conclusion_word#",
20 |     "#random_nice_conclusion_word#",
21 |     "{seed.title}",
22 |     "#positive_word.capitalize#",
23 |     "{seed.plural.title} are #positive_word#",
24 |     "The importance of {seed.wikihow_action.ing.lower}",
25 |     ""
26 |   ],
27 |   "random_nice_conclusion_word": [
28 |     "Passion",
29 |     "Love",
30 |     "Remember",
31 |     "Take care",
32 |     "Be free",
33 |     "Don't forget"
34 |   ],
35 |   "punchline_description": [
36 |     "Key Idea",
37 |     "Life Lesson",
38 |     "{title}",
39 |     "Watch out",
40 |     "Remember",
41 |     "Not #positive_word#",
42 |     "Do not {seed.wikihow_action}",
43 |     "Avoid {seed.wikihow_action.ing.lower}",
44 |     ""
45 |   ],
46 |   "positive_word": [
47 |     "great",
48 |     "amazing",
49 |     "wonderful",
50 |     "our best friends",
51 |     "necessary",
52 |     "fabulous",
53 |     "genius",
54 |     "champions",
55 |     "admirable",
56 |     "a blessing",
57 |     "original",
58 |     "skilled"
59 |   ]
60 | }


--------------------------------------------------------------------------------
/talkgenerator/slide/slide_deck.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List
 3 | 
 4 | from talkgenerator.slide.slides import Slide
 5 | 
 6 | logger = logging.getLogger("talkgenerator")
 7 | 
 8 | 
 9 | class SlideDeck:
10 |     """ Represents a deck of Slide objects    """
11 | 
12 |     def __init__(self, size):
13 |         self._size = size
14 |         self._slides : List[Slide] = [None] * size
15 | 
16 |     def add_slide(self, slide_index: int, slide):
17 |         self._slides[slide_index] = slide
18 | 
19 |     def is_complete(self):
20 |         return len(self._slides) >= self._size and (None not in self._slides)
21 | 
22 |     def save_to_powerpoint(self, prs_template):
23 |         """ Should generate a slide in the powerpoint """
24 |         if not self.is_complete():
25 |             logger.error(
26 |                 "ERROR: SOME SLIDES WERE NOT GENERATED: {}".format(self._slides)
27 |             )
28 |             self._slides = [slide for slide in self._slides if slide is not None]
29 |         return [slide.create_powerpoint_slide(prs_template) for slide in self._slides]
30 | 
31 |     def to_slide_deck_dictionary(self) -> List[dict]:
32 |         return [slide.to_slide_dictionary() for slide in self._slides]
33 | 
34 |     def get_structured_data(self):
35 |         """ Return slide deck as structured data for alternative presentation """
36 |         if not self.is_complete():
37 |             logger.error(
38 |                 "ERROR: SOME SLIDES WERE NOT GENERATED: {}".format(self._slides)
39 |             )
40 |             self._slides = [slide for slide in self._slides if slide is not None]
41 |         return [slide for slide in self._slides]
42 | 
43 |     def has_slide_nr(self, index):
44 |         return 0 <= index < self._size and self._slides[index] is not None
45 | 


--------------------------------------------------------------------------------
/tests/test_random_util.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import unittest
 3 | 
 4 | from talkgenerator.util import random_util
 5 | 
 6 | 
 7 | class RandomUtilTest(unittest.TestCase):
 8 |     def setUp(self):
 9 |         random.seed(1)
10 | 
11 |     def test_weighted_random_all_appear(self):
12 |         possibilities = (1, "one"), (4, "four"), (6, "six"), (7, "seven")
13 |         results = set()
14 |         for i in range(10000):
15 |             if len(results) == len(possibilities):
16 |                 break
17 |             results.add(random_util.weighted_random(possibilities))
18 |         self.assertEqual({"one", "four", "six", "seven"}, results)
19 | 
20 |     def test_weighted_random_all_appear_double_values(self):
21 |         possibilities = (0.1, "one"), (0.4, "four"), (0.6, "six"), (0.7, "seven")
22 |         results = set()
23 |         for _ in range(1000):
24 |             if len(results) == len(possibilities):
25 |                 break
26 |             results.add(random_util.weighted_random(possibilities))
27 |         self.assertEqual({"one", "four", "six", "seven"}, results)
28 | 
29 |     def test_weighted_random_all_appear_double_values_appearances(self):
30 |         possibilities = (0.1, "one"), (0.4, "four")
31 |         ones = 0
32 |         fours = 0
33 |         for _ in range(1000):
34 |             generated = random_util.weighted_random(possibilities)
35 |             if generated == "one":
36 |                 ones += 1
37 |             elif generated == "four":
38 |                 fours += 1
39 | 
40 |         # Ones should appear 1/5 * 1000 ~ 200 times
41 |         self.assertTrue(150 < ones < 250)
42 |         # Ones should appear 4/5 * 1000 ~ 800 times
43 |         self.assertTrue(750 < fours < 850)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     unittest.main()
48 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/reddit.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import logging
 3 | from functools import lru_cache
 4 | from pathlib import Path
 5 | 
 6 | import praw
 7 | # from cachier import cachier
 8 | from prawcore import ResponseException
 9 | from prawcore import RequestException
10 | 
11 | from talkgenerator import settings
12 | 
13 | singleton_reddit = None
14 | 
15 | logger = logging.getLogger("talkgenerator")
16 | 
17 | 
18 | def get_reddit():
19 |     reddit = singleton_reddit
20 |     if not bool(reddit):
21 |         reddit = praw.Reddit(**settings.reddit_auth())
22 |     return reddit
23 | 
24 | 
25 | def has_reddit_access():
26 |     return bool(get_reddit())
27 | 
28 | 
29 | def get_subreddit(name):
30 |     if has_reddit_access():
31 |         subreddit = get_reddit().subreddit(name)
32 |         if subreddit:
33 |             return subreddit
34 | 
35 | 
36 | @lru_cache(maxsize=20)
37 | # @cachier(cache_dir=Path("..", "tmp").absolute(), stale_after=datetime.timedelta(weeks=2))
38 | def search_subreddit(name, query, sort="relevance", limit=500, filter_nsfw=True):
39 |     if has_reddit_access():
40 |         try:
41 |             submissions = list(
42 |                 get_subreddit(name).search(query, sort=sort, limit=limit)
43 |             )
44 | 
45 |             if filter_nsfw:
46 |                 submissions = [
47 |                     submission for submission in submissions if not submission.over_18
48 |                 ]
49 |             return submissions
50 | 
51 |         except ResponseException as err:
52 |             logger.error("Exception with accessing Reddit: {}".format(err))
53 |         except RequestException as err:
54 |             logger.error("Exception with accessing Reddit: {}".format(err))
55 |     else:
56 |         logger.warning("WARNING: No reddit access!")
57 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/pexels.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from typing import List
 4 | 
 5 | # from cachier import cachier
 6 | from pexels_api import API
 7 | from talkgenerator import settings
 8 | from talkgenerator.datastructures.image_data import ImageData
 9 | 
10 | logging.getLogger("pexels").setLevel(logging.DEBUG)
11 | logger = logging.getLogger("talkgenerator")
12 | 
13 | 
14 | def get_pexels_session():
15 |     creds = settings.pexels_auth()
16 |     api = API(creds["pexels_key"])
17 |     return api
18 | 
19 | 
20 | pexels_session = get_pexels_session()
21 | 
22 | 
23 | # @cachier(cache_dir=Path("..", "tmp").absolute())
24 | def _search_pexels(query):
25 |     return pexels_session.search(query)
26 | 
27 | 
28 | def search_photos(query) -> List[ImageData]:
29 |     if pexels_session and query:
30 |         results = _search_pexels(query)
31 |         if results and results["photos"]:
32 |             images = []
33 |             for photo in results["photos"]:
34 |                 source = photo["src"]
35 |                 # link_download = (
36 |                 #     source["large"]
37 |                 #     if "large" in source
38 |                 #     else (source["original"] if "original" in source else photo["url"])
39 |                 # )
40 |                 link_download = source["original"]
41 |                 creator = (
42 |                     (photo["photographer"] + " (via Pexels)")
43 |                     if "photographer" in photo
44 |                     else None
45 |                 )
46 |                 images.append(ImageData(image_url=link_download, source=creator))
47 |             return images
48 |         else:
49 |             logger.warning(
50 |                 'pexels could not find results for "{}", which might be due to missing/erroneous access keys'.format(
51 |                     query
52 |                 )
53 |             )
54 |     else:
55 |         logger.warning("No active pexels session due to missing/wrong credentials.")
56 | 


--------------------------------------------------------------------------------
/tests/test_slide_topic_generators.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import unittest
 3 | 
 4 | from talkgenerator.schema.slide_topic_generators import SideTrackingTopicGenerator
 5 | 
 6 | 
 7 | class SlideTopicGeneratorsTest(unittest.TestCase):
 8 |     def setUp(self) -> None:
 9 |         random.seed(123)
10 | 
11 |     def test_conceptnet_sidetracking_standard(self):
12 |         generator = SideTrackingTopicGenerator(["cat"], 5)
13 |         self.assertTrue(
14 |             len([seed for seed in generator.all_seeds() if seed is None]) == 0
15 |         )
16 | 
17 |     def test_conceptnet_sidetracking_non_existing_topic(self):
18 |         non_existing_word = "nonexistingword-bla-bla"
19 |         generator = SideTrackingTopicGenerator([non_existing_word], 10)
20 |         self.assertTrue(
21 |             len([seed for seed in generator.all_seeds() if seed == non_existing_word])
22 |             == 10
23 |         )
24 | 
25 |     def test_conceptnet_sidetracking_hard_topic(self):
26 |         generator = SideTrackingTopicGenerator(["scratch furniture"], 10)
27 |         self.assertTrue(
28 |             len([seed for seed in generator.all_seeds() if seed is None]) == 0
29 |         )
30 | 
31 |     def test_conceptnet_sidetracking_multi_topic(self):
32 |         generator = SideTrackingTopicGenerator(["cat", "house", "dog"], 6)
33 |         seeds = generator.all_seeds()
34 |         print("multi_topic", seeds)
35 |         self.assertTrue(seeds[0] == "cat")
36 |         self.assertTrue(seeds[2] == "house")
37 |         self.assertTrue(seeds[4] == "dog")
38 |         # Nothing is none
39 |         self.assertTrue(
40 |             len([seed for seed in generator.all_seeds() if seed is None]) == 0
41 |         )
42 | 
43 |     def test_conceptnet_sidetracking_multi_topic_one_each(self):
44 |         topics = ["cat", "house", "chicken", "horse", "dog"]
45 |         generator = SideTrackingTopicGenerator(topics, len(topics))
46 |         self.assertEqual(topics, generator.all_seeds())
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     unittest.main()
51 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/pixabay.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import requests
 3 | from typing import List
 4 | 
 5 | from talkgenerator import settings
 6 | from talkgenerator.datastructures.image_data import ImageData
 7 | 
 8 | logging.getLogger("pixabay").setLevel(logging.DEBUG)
 9 | logger = logging.getLogger("talkgenerator")
10 | 
11 | 
12 | def get_pixabay_session():
13 |     creds = settings.pixabay_auth()
14 |     api_key = creds["pixabay_key"]
15 |     return api_key
16 | 
17 | 
18 | def search_horizontal(query):
19 |     return search_photos(query, orientation="horizontal")
20 | 
21 | 
22 | def search_vertical(query):
23 |     return search_photos(query, orientation="vertical")
24 | 
25 | 
26 | def search_photos(query, orientation="all") -> List[ImageData]:
27 |     api_key = get_pixabay_session()
28 |     logger.debug("pixabay_api_key: {}".format(api_key))
29 |     query = query.replace(' ', '+')
30 |     logger.debug("pixabay.search_photos called with query: {}".format(query))
31 |     url_query = f"https://pixabay.com/api/?key={api_key}&q={query}&image_type=photo&orientation={orientation}"
32 |     logger.debug("pixabay url_query: {}".format(url_query))
33 |     if api_key and url_query:
34 |         results = requests.get(url=url_query)
35 |         logger.debug("request response results: {}".format(results))
36 |         response_data = results.json()
37 |         if results.status_code == 200 and response_data["hits"]:
38 |             images = []
39 |             for photo in response_data["hits"]:
40 |                 link_download = photo["largeImageURL"]
41 |                 creator = photo["user"] + " (via Pixabay)" if "user" in photo else None
42 |                 images.append(ImageData(image_url=link_download, source=creator))
43 |             return images
44 |         else:
45 |             logger.warning(
46 |                 'Pixabay could not find results for "{}", which might be due to missing/erroneous access keys'.format(
47 |                     query
48 |                 )
49 |             )
50 |     else:
51 |         logger.warning("No active Pixabay session due to missing/wrong credentials.")
52 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os import listdir
 2 | from os.path import isfile, join
 3 | 
 4 | from setuptools import setup
 5 | from setuptools import find_packages
 6 | 
 7 | # Build a list of text-templates to install
 8 | DATA_PATH = "talkgenerator/data/"
 9 | text_templates_path = DATA_PATH + "text-templates/"
10 | text_template_files = [
11 |     f for f in listdir(text_templates_path) if isfile(join(text_templates_path, f))
12 | ]
13 | all_text_templates = []
14 | for f in text_template_files:
15 |     all_text_templates.append(text_templates_path + f)
16 | 
17 | 
18 | prohibited_images_path = DATA_PATH + "prohibited_images/"
19 | prohibited_images_files = [
20 |     f
21 |     for f in listdir(prohibited_images_path)
22 |     if isfile(join(prohibited_images_path, f))
23 | ]
24 | prohibited_images = []
25 | for f in prohibited_images_files:
26 |     prohibited_images.append(prohibited_images_path + f)
27 | 
28 | with open('requirements.txt') as f:
29 |     required = f.read().splitlines()
30 | 
31 | setup(
32 |     name="talkgenerator",
33 |     version="3.0",
34 |     description="Automatically generating presentation slide decks based on a given topic for improvised presentations",
35 |     long_description="Check our GitHub repository on https://github.com/korymath/talk-generator for more information!",
36 |     author="Thomas Winters, Kory Mathewson",
37 |     author_email="info@thomaswinters.be",
38 |     url="https://github.com/korymath/talk-generator",
39 |     license="MIT License",
40 |     platforms=["Mac", "Linux"],
41 |     packages=find_packages(),  # auto-discovery submodules ["talkgenerator"],
42 |     package_dir={"talkgenerator": "talkgenerator"},
43 |     data_files=[
44 |         ("images", [DATA_PATH + "images/black-transparent.png"]),
45 |         ("images", [DATA_PATH + "images/error_placeholder.png"]),
46 |         ("powerpoint", [DATA_PATH + "powerpoint/template.pptx"]),
47 |         ("prohibited_images", prohibited_images),
48 |         ("text-templates", all_text_templates),
49 |     ],
50 |     include_package_data=True,
51 |     install_requires=required,
52 |     entry_points={"console_scripts": ["talkgenerator = talkgenerator.run:main_cli"]},
53 | )
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # VSCode Files (IDE) 
  3 | .vscode
  4 | *.code-workspace
  5 | 
  6 | # Mac hygiene
  7 | .DS_Store
  8 | 
  9 | # AWS Keys
 10 | .aws
 11 | 
 12 | # Runtime generated files
 13 | downloads/*
 14 | output/*.pptx
 15 | data/eval/timings.txt
 16 | 
 17 | # Byte-compiled / optimized / DLL files
 18 | __pycache__/
 19 | *.py[cod]
 20 | *$py.class
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | *.egg-info/
 40 | .installed.cfg
 41 | *.egg
 42 | MANIFEST
 43 | 
 44 | # PyInstaller
 45 | #  Usually these files are written by a python script from a template
 46 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 47 | *.manifest
 48 | *.spec
 49 | 
 50 | # Installer logs
 51 | pip-log.txt
 52 | pip-delete-this-directory.txt
 53 | 
 54 | # Unit test / coverage reports
 55 | htmlcov/
 56 | .tox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *.cover
 63 | .hypothesis/
 64 | .pytest_cache/
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Django stuff:
 71 | *.log
 72 | local_settings.py
 73 | db.sqlite3
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 | 
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 | 
113 | # Rope project settings
114 | .ropeproject
115 | 
116 | # mkdocs documentation
117 | /site
118 | 
119 | # mypy
120 | .mypy_cache/
121 | .idea/
122 | output/
123 | data/auth/reddit.json
124 | test_output/
125 | data/auth/wikihow.json
126 | data/auth/pexels.json
127 | venv2/
128 | venv3/
129 | 
130 | tmp/
131 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/phrasefinder.py:
--------------------------------------------------------------------------------
 1 | from json import JSONDecodeError
 2 | from pathlib import Path
 3 | 
 4 | import requests
 5 | # from cachier import cachier
 6 | 
 7 | from talkgenerator.util import language_util
 8 | 
 9 | URL = "https://api.phrasefinder.io/search?corpus=eng-us&query={}&nmax=1"
10 | 
11 | 
12 | # @cachier(cache_dir=Path("..", "tmp").absolute())
13 | def _search(word):
14 |     word.replace(" ", "%20")
15 |     url = URL.format(word)
16 |     try:
17 |         result = requests.get(url)
18 |         result = result.json()
19 |         if result:
20 |             return result["phrases"]
21 |     except JSONDecodeError:
22 |         return None
23 | 
24 | 
25 | def _get_absolute_frequencies(word):
26 |     pf_results = _search(word)
27 |     if pf_results:
28 |         absolute_frequencies = []
29 |         for word_count in pf_results:
30 |             word = word_count["tks"][0]["tt"]
31 |             count = word_count["mc"]
32 |             absolute_frequencies.append((word, count))
33 |         return absolute_frequencies
34 | 
35 | 
36 | def get_absolute_frequency(word):
37 |     absolute_frequencies = _get_absolute_frequencies(word)
38 |     absolute_frequency = list(
39 |         filter(lambda word_count: word_count[0] == word, absolute_frequencies)
40 |     )
41 |     if len(absolute_frequency) == 1:
42 |         return absolute_frequency[0][1]
43 | 
44 | 
45 | def get_absolute_frequency_any_casing(word):
46 |     absolute_frequencies = _get_absolute_frequencies(word)
47 |     if absolute_frequencies:
48 |         return sum(map(lambda word_count: word_count[1], absolute_frequencies))
49 | 
50 | 
51 | def get_rarest_word(sentence):
52 |     words = [
53 |         language_util.replace_non_alphabetical_characters(word)
54 |         for word in sentence.split(" ")
55 |     ]
56 |     words = filter(lambda word: word is not None and len(word.strip()) > 0, words)
57 | 
58 |     min_word = None
59 |     min_freq = -1
60 |     for word in words:
61 |         freq = get_absolute_frequency_any_casing(word)
62 |         if freq is not None and (min_freq == -1 or freq < min_freq):
63 |             min_word = word
64 |             min_freq = freq
65 |     return min_word
66 |     # return min(words, key=lambda word: get_absolute_frequency_any_casing(word))
67 | 


--------------------------------------------------------------------------------
/talkgenerator/util/os_util.py:
--------------------------------------------------------------------------------
 1 | import ntpath
 2 | import os
 3 | import logging
 4 | import pathlib
 5 | import sys
 6 | from functools import lru_cache
 7 | from typing import Union
 8 | 
 9 | import requests
10 | from PIL import Image
11 | from PIL.Image import DecompressionBombError
12 | 
13 | # import tempfile
14 | from talkgenerator.datastructures.image_data import ImageData
15 | 
16 | logger = logging.getLogger("talkgenerator")
17 | 
18 | 
19 | def to_actual_file(filename=""):
20 |     """Return the path to the filename specified.
21 |     This is used most often to get the path of data files."""
22 | 
23 |     util_folder = os.path.dirname((os.path.dirname(os.path.abspath(__file__))))
24 |     return os.path.join(util_folder, filename)
25 | 
26 | 
27 | @lru_cache(maxsize=20)
28 | def read_lines(filename):
29 |     actual_file = to_actual_file(filename)
30 |     return [line.rstrip("\n") for line in open(actual_file)]
31 | 
32 | 
33 | @lru_cache(maxsize=20)
34 | def open_image(filename):
35 |     try:
36 |         return Image.open(filename)
37 |     except DecompressionBombError:
38 |         return None
39 | 
40 | 
41 | _PROHIBITED_IMAGES_DIR = "data/prohibited_images/"
42 | 
43 | 
44 | @lru_cache(maxsize=1)
45 | def get_prohibited_images():
46 |     actual_dir = to_actual_file(_PROHIBITED_IMAGES_DIR)
47 |     return list(
48 |         [open_image(os.path.join(actual_dir, url)) for url in os.listdir(actual_dir)]
49 |     )
50 | 
51 | 
52 | @lru_cache(maxsize=20)
53 | def is_image(content: Union[str, ImageData]):
54 |     if isinstance(content, ImageData):
55 |         return True
56 |     else:
57 |         return _is_image_path(content)
58 | 
59 | 
60 | def _is_image_path(content: str):
61 |     if not bool(content) or bool(content) is content or not content.lower:
62 |         return False
63 |     lower_url = content.lower()
64 |     return (
65 |         ".jpg" in lower_url
66 |         or ".gif" in lower_url
67 |         or ".png" in lower_url
68 |         or ".jpeg" in lower_url
69 |     )
70 | 
71 | 
72 | def show_logs(given_logger: logging.Logger):
73 |     given_logger.setLevel(logging.DEBUG)
74 |     handler = logging.StreamHandler(sys.stdout)
75 |     handler.setLevel(logging.DEBUG)
76 |     formatter = logging.Formatter(
77 |         "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
78 |     )
79 |     handler.setFormatter(formatter)
80 |     given_logger.addHandler(handler)
81 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/chart_texts.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "origin": [
 3 |     "#yes_no_question#"
 4 |   ],
 5 |   "yes_no_question": [
 6 |     "Do you know what {seed.a} is?",
 7 |     "Do you like {seed.wikihow_action.ing.lower}?",
 8 |     "Would you like to {seed.wikihow_action.lower}?",
 9 |     "Are you afraid of {seed.wikihow_action.ing.lower}?",
10 |     "Would you accept it if your #family_member# started to {seed.wikihow_action.lower}?",
11 |     "Do you want to learn how to {seed.wikihow_action.lower}?",
12 |     "Do you know how to {seed.wikihow_action.lower}?",
13 |     "Do you agree that {seed.a} belongs in {seed.conceptnet_location}"
14 |   ],
15 |   "location_question": [
16 |     "Places where you can find {seed.singular.a.lower}",
17 |     "Places you will find {seed.plural.lower}",
18 |     "{seed.singular.a.title}'s Favourite Location"
19 |   ],
20 |   "property_question": [
21 |     "My favourite properties of {seed.a}",
22 |     "What I admire most about {seed.plural}",
23 |     "What {seed.plural} mostly are"
24 |   ],
25 |   "correlation_title": [
26 |     "Correlation between {x_label.get_last_noun_and_article.plural} and {y_label.plural}",
27 |     "Relation between {x_label.get_last_noun_and_article.plural} and {y_label.plural}",
28 |     "Influence of {x_label.get_last_noun_and_article.plural} on {y_label.plural}",
29 |     "Correlation between {x_label.is_verb.ing} and {y_label.plural}",
30 |     "Relation between {x_label.is_verb.ing} and {y_label.plural}",
31 |     "Influence of {x_label.is_verb.ing} on {y_label.plural}",
32 |     "{x_label} VS {y_label.plural}"
33 |   ],
34 |   "funny_yes_no_answer": [
35 |     "Absolutely",
36 |     "Absolutely not",
37 |     "I'd rather die",
38 |     "Can you repeat the question?",
39 |     "Can I instead just {seed.wikihow_action.2_to_1_pronouns.lower}?",
40 |     "I'd rather {seed.wikihow_action.2_to_1_pronouns.lower}",
41 |     "But sir, I *AM* {seed.a}",
42 |     "{chart_title.get_last_noun_and_article.2_to_1_pronouns.title}#?#",
43 |     "HOW DARE YOU SPEAK ABOUT {chart_title.get_last_noun_and_article.2_to_1_pronouns.upper.plural}#?#",
44 |     "I'd rather not talk about {chart_title.get_last_noun_and_article.2_to_1_pronouns.lower}",
45 |     "My beliefs don't allow that",
46 |     "My #family_member# doesn't allow me that",
47 |     "Who wouldn't?"
48 |   ],
49 |   "?": [
50 |     "?",
51 |     "??",
52 |     "???"
53 |   ],
54 |   "family_member": [
55 |     "kid",
56 |     "daughter",
57 |     "son",
58 |     "loved one"
59 |   ]
60 | }


--------------------------------------------------------------------------------
/tests/test_specific_text_generators.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import unittest
 3 | 
 4 | from talkgenerator.schema.content_generator_structures import (
 5 |     create_tracery_generator,
 6 |     create_templated_text_generator,
 7 | )
 8 | 
 9 | default_number_of_generations = 100
10 | default_arguments = {"seed": "house", "presenter": "A. Nonymous", "topic": "house"}
11 | 
12 | 
13 | class SpecificTextGeneratorTest(unittest.TestCase):
14 |     def setUp(self):
15 |         random.seed(1)
16 | 
17 |     def _tracery_tester(
18 |         self, file_location, grammar_element="origin", print_generations=False
19 |     ):
20 |         tracery_generator = create_tracery_generator(file_location, grammar_element)
21 |         generations = [
22 |             tracery_generator(default_arguments)
23 |             for _ in range(0, default_number_of_generations)
24 |         ]
25 |         if print_generations:
26 |             print("\n".join(generations))
27 |         self.assertEqual(len(generations), default_number_of_generations)
28 | 
29 |     def _templated_text_generator_tester(
30 |         self,
31 |         file_location,
32 |         print_generations=False,
33 |         number_of_generations=default_number_of_generations,
34 |         seed=default_arguments["seed"],
35 |     ):
36 |         templated_generator = create_templated_text_generator(file_location)
37 |         arguments = dict(default_arguments)
38 |         arguments["seed"] = seed
39 |         generations = [
40 |             templated_generator(arguments) for _ in range(0, number_of_generations)
41 |         ]
42 |         if print_generations:
43 |             print("\n".join(generations))
44 |         self.assertEqual(len(generations), number_of_generations)
45 |         return generations
46 | 
47 |     def test_talk_title_generator(self):
48 |         self._tracery_tester("data/text-templates/talk_title.json")
49 | 
50 |     def test_talk_subtitle_generator(self):
51 |         self._tracery_tester("data/text-templates/talk_subtitle.json", "job")
52 | 
53 |     def test_anecdote_prompt_generator(self):
54 |         self._templated_text_generator_tester(
55 |             "data/text-templates/anecdote_prompt.txt", True
56 |         )
57 | 
58 |     def test_captions_generator(self):
59 |         generations = self._templated_text_generator_tester(
60 |             "data/text-templates/double_captions.txt",
61 |             True,
62 |             number_of_generations=100,
63 |             seed="cat",
64 |         )
65 | 
66 |         for generation in generations:
67 |             self.assertTrue("<built-in method" not in generation)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     unittest.main()
72 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/unsplash.py:
--------------------------------------------------------------------------------
 1 | """ Module for interacting with Wikihow """
 2 | 
 3 | import logging
 4 | from json import JSONDecodeError
 5 | from pathlib import Path
 6 | from typing import List
 7 | 
 8 | # from cachier import cachier
 9 | from pyunsplash import PyUnsplash
10 | 
11 | from talkgenerator.datastructures.image_data import ImageData
12 | from talkgenerator import settings
13 | 
14 | # pyunsplash logger defaults to level logging.ERROR
15 | # If you need to change that, use getLogger/setLevel
16 | # on the module logger, like this:
17 | logging.getLogger("pyunsplash").setLevel(logging.DEBUG)
18 | logger = logging.getLogger("talkgenerator")
19 | 
20 | 
21 | def get_unsplash_session():
22 |     creds = settings.unsplash_auth()
23 |     # instantiate PyUnsplash object
24 |     api = PyUnsplash(api_key=creds["unsplash_access_key"])
25 |     return api
26 | 
27 | 
28 | unsplash_session = get_unsplash_session()
29 | 
30 | 
31 | def _map_to_image_data(photo):
32 |     link_download = photo.link_download
33 |     creator_user = photo.body["user"]
34 |     creator_name = None
35 |     if creator_user:
36 |         creator_name = creator_user["name"] + " (Unsplash)"
37 |     return ImageData(image_url=link_download, source=creator_name)
38 | 
39 | def random(_=None):
40 |     try:
41 |         random_image = unsplash_session.photos(type_="random")
42 |         image_url = random_image.body["links"]["download"]
43 |         creator_name = random_image.body["user"]["name"]
44 |         return ImageData(image_url=image_url, source=creator_name)
45 |     except JSONDecodeError:
46 |         logger.warning("Couldn't get random Unsplash image")
47 |         return None
48 | 
49 | 
50 | def random_as_list(_=None):
51 |     result = random(_)
52 |     if result:
53 |         return [result]
54 |     else:
55 |         return []
56 | 
57 | 
58 | # @cachier(cache_dir=Path("..", "tmp").absolute())
59 | def search_photos(query) -> List[ImageData]:
60 |     if unsplash_session and query:
61 |         results = unsplash_session.search(type_="photos", query=query)
62 |         if results and results.body:
63 |             images = []
64 |             for photo in results.entries:
65 |                 images.append(_map_to_image_data(photo))
66 |             return images
67 |         else:
68 |             logger.warning(
69 |                 'Unsplash could not find results for "{}", which might be due to missing/erroneous access keys'.format(
70 |                     query
71 |                 )
72 |             )
73 |     elif unsplash_session and not query:
74 |         return random_as_list()
75 |     else:
76 |         logger.warning("No active Unsplash session due to missing/wrong credentials.")
77 | 


--------------------------------------------------------------------------------
/tests/test_talkgenerator.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import logging
 3 | import unittest
 4 | from unittest import mock
 5 | 
 6 | from talkgenerator.schema import slide_schemas
 7 | from talkgenerator import generator
 8 | from talkgenerator.slide import powerpoint_slide_creator
 9 | from talkgenerator.util import os_util
10 | 
11 | 
12 | class TestTalkGenerator(unittest.TestCase):
13 |     def setUp(self):
14 |         random.seed(1)
15 |         self.default_args = mock.Mock()
16 |         self.default_args.configure_mock(topic="cat")
17 |         self.default_args.configure_mock(num_slides=3)
18 |         self.default_args.configure_mock(schema="default")
19 |         self.default_args.configure_mock(title=None)
20 |         self.default_args.configure_mock(parallel=True)
21 |         self.default_args.configure_mock(
22 |             output_folder=os_util.to_actual_file("../output/test/")
23 |         )
24 |         self.default_args.configure_mock(open_ppt=False)
25 |         self.default_args.configure_mock(save_ppt=True)
26 |         self.default_args.configure_mock(int_seed=123)
27 | 
28 |     def test_serial(self):
29 |         self.default_args.configure_mock(parallel=False)
30 |         ppt, _, _ = generator.generate_presentation_using_cli_arguments(
31 |             self.default_args
32 |         )
33 | 
34 |         self.assertEqual(3, len(ppt.slides))
35 | 
36 |     def test_to_dictionary(self):
37 |         _, slide_deck, _ = generator.generate_presentation(
38 |             schema="default",
39 |             slides=3,
40 |             topic="cat",
41 |             title=None,
42 |             presenter=None,
43 |             parallel=True,
44 |             int_seed=123,
45 |             save_ppt=False,
46 |             open_ppt=False,
47 |             print_logs=False,
48 |         )
49 |         slides_dict = slide_deck.to_slide_deck_dictionary()
50 |         logging.info(slides_dict)
51 |         self.assertIsNotNone(slides_dict)
52 | 
53 |     def test_all_slide_generators(self):
54 |         basic_presentation_context = {
55 |             "topic": "dog",
56 |             "seed": "cat",
57 |             "presenter": "An O. Nymous",
58 |             "title": "Mock title",
59 |         }
60 | 
61 |         presentation = powerpoint_slide_creator.create_new_powerpoint()
62 | 
63 |         for slide_generator in slide_schemas.all_slide_generators:
64 |             logging.info("Testing Slide Generator: {}".format(slide_generator))
65 |             random.seed(123)
66 |             slide, _ = slide_generator.generate(
67 |                 basic_presentation_context, []
68 |             )
69 |             slide.create_powerpoint_slide(presentation)
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     unittest.main()
74 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/talk_subtitle.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "origin": [
  3 |     "#subtitle#"
  4 |   ],
  5 |   "subtitle": [
  6 |     "A presentation by #name_and_job#",
  7 |     "By #name_and_job#",
  8 |     "Presented by #name_and_job#"
  9 |   ],
 10 |   "name_and_job": [
 11 |     "{presenter}, #job#",
 12 |     "{presenter},\n#job#"
 13 |   ],
 14 |   "title": [
 15 |     "professor ",
 16 |     "dr ",
 17 |     "ir ",
 18 |     "",
 19 |     ""
 20 |   ],
 21 |   "knowledge_person": [
 22 |     "Researcher",
 23 |     "Expert",
 24 |     "Professor",
 25 |     "Doctor",
 26 |     "Master",
 27 |     "Guru",
 28 |     "PhD",
 29 |     "Fanatic"
 30 |   ],
 31 |   "job": [
 32 |     "#knowledge_person# in #science#",
 33 |     "#knowledge_person# of #science#",
 34 |     "#science# #knowledge_person#",
 35 |     "#science# #knowledge_person#"
 36 |   ],
 37 |   "science": [
 38 |     "{seed.title} #science_type#",
 39 |     "{seed.title} #science_type#",
 40 |     "#science_specifier# {seed.title} #science_type#",
 41 |     "#science_specifier# {seed.title} #science_type#",
 42 |     "#science_specifier# {seed.title} #science_type#",
 43 |     "#science_prefix#{seed.title.last_letter_is_vowel}#science_suffix#",
 44 |     "#science_prefix#{seed.title.last_letter_is_consonant}o#science_suffix#",
 45 |     "#science_prefix#{seed.title} #science_type#",
 46 |     "#science_specifier# #science_prefix#{seed.title} #science_type#"
 47 |   ],
 48 |   "science_prefix": [
 49 |     "Meta-",
 50 |     "Aero",
 51 |     "Bio",
 52 |     "Anti-",
 53 |     "Eco",
 54 |     "Electro",
 55 |     "Micro",
 56 |     "Macro",
 57 |     "Nano",
 58 |     "Poly"
 59 |   ],
 60 |   "science_suffix": [
 61 |     "graphy",
 62 |     "logy",
 63 |     "nomy",
 64 |     "nomics",
 65 |     "mony",
 66 |     "nymy"
 67 |   ],
 68 |   "science_specifier": [
 69 |     "Advanced",
 70 |     "Applied",
 71 |     "Comparative",
 72 |     "Descriptive",
 73 |     "Digital",
 74 |     "Dynamical",
 75 |     "Ecological",
 76 |     "Electronic",
 77 |     "Empirical",
 78 |     "Ethical",
 79 |     "Executive",
 80 |     "Exploratory",
 81 |     "Fundamental",
 82 |     "Instrumental",
 83 |     "Logical",
 84 |     "Molecular",
 85 |     "Moral",
 86 |     "Neural",
 87 |     "Nuclear",
 88 |     "Observational",
 89 |     "Philosophy of",
 90 |     "Physical",
 91 |     "Pure",
 92 |     "Quantum",
 93 |     "Renewable",
 94 |     "Social",
 95 |     "Statistical",
 96 |     "Systematic",
 97 |     "Theoretical"
 98 |   ],
 99 |   "science_type": [
100 |     "Biology",
101 |     "Chemistry",
102 |     "Design",
103 |     "Dynamics",
104 |     "Engineering",
105 |     "Fusion",
106 |     "Literature",
107 |     "Logic",
108 |     "Mechanics",
109 |     "Medicine",
110 |     "Methodology",
111 |     "Philosophy",
112 |     "Physics",
113 |     "Science",
114 |     "Statistics",
115 |     "Studies",
116 |     "Systems",
117 |     "Technology",
118 |     "Theory"
119 |   ]
120 | }


--------------------------------------------------------------------------------
/talkgenerator/sources/shitpostbot.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from functools import lru_cache
 3 | from pathlib import Path
 4 | 
 5 | import requests
 6 | from bs4 import BeautifulSoup
 7 | # from cachier import cachier
 8 | 
 9 | from talkgenerator.util import scraper_util
10 | 
11 | _MAX_RANDOM_PAGE = 150
12 | _SEARCH_URL = (
13 |     "https://www.shitpostbot.com/gallery/sourceimages?query={"
14 |     "}&review_state=accepted&order=total_rating&direction=DESC&page={} "
15 | )
16 | 
17 | 
18 | def _search_shitpostbot_page(search_term, page):
19 |     return [element[1] for element in _search_shitpostbot_page_rated(search_term, page)]
20 | 
21 | 
22 | @lru_cache(maxsize=20)
23 | # @cachier(cache_dir=Path("..", "tmp").absolute())
24 | def _search_shitpostbot_page_rated(search_term, page):
25 |     url = _SEARCH_URL.format(search_term, page, search_term.replace(" ", "+"))
26 |     page = requests.get(url)
27 |     if page:
28 |         soup = BeautifulSoup(page.content, "html.parser")
29 | 
30 |         post_entries = soup.find_all("div", class_="col-md-4")
31 |         image_urls = []
32 |         for entry in post_entries:
33 |             # Check if author doesn't have the search term (False positive)
34 |             user = (
35 |                 entry.find("div", class_="caption")
36 |                 .find_all("p")[1]
37 |                 .find("a")
38 |                 .get_text()
39 |             )
40 |             if bool(search_term) and search_term in user:
41 |                 continue
42 | 
43 |             # Get real image url
44 |             image_url = entry.find("img").get("src")
45 |             image_url = _get_source_image(image_url)
46 |             rating_div = entry.find("span", class_="rating")
47 |             rating = int(rating_div.text if rating_div else 1)
48 |             if rating > 0:
49 |                 image_urls.append((rating, image_url))
50 | 
51 |         return image_urls
52 | 
53 | 
54 | source_image_prefix = "https://www.shitpostbot.com/img/sourceimages/"
55 | 
56 | 
57 | def _get_source_image(image_url):
58 |     image_url = image_url.replace("%2F", "/")
59 |     last_slash_idx = image_url.rfind("/")
60 |     image_file_name = image_url[last_slash_idx + 1 :]
61 |     return source_image_prefix + image_file_name
62 | 
63 | 
64 | def get_random_images(_):
65 |     images = _search_shitpostbot_page("", random.choice(range(_MAX_RANDOM_PAGE)))
66 |     return images
67 | 
68 | 
69 | def get_random_images_rated(_):
70 |     images = _search_shitpostbot_page_rated("", random.choice(range(_MAX_RANDOM_PAGE)))
71 |     return images
72 | 
73 | 
74 | _search_image_function = scraper_util.create_page_scraper(_search_shitpostbot_page)
75 | _search_image_function_rated = scraper_util.create_page_scraper(
76 |     _search_shitpostbot_page_rated
77 | )
78 | 
79 | 
80 | def search_images(search_term, number=50):
81 |     return _search_image_function(search_term, number)
82 | 
83 | 
84 | def search_images_rated(search_term, number=50):
85 |     return _search_image_function_rated(search_term, number)
86 | 


--------------------------------------------------------------------------------
/talkgenerator/settings.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from environs import Env
  3 | 
  4 | 
  5 | logger = logging.getLogger("talkgenerator")
  6 | env = Env()
  7 | env.read_env()
  8 | 
  9 | reddit_keys = ["REDDIT_CLIENT_ID", "REDDIT_CLIENT_SECRET", "REDDIT_USER_AGENT"]
 10 | wikihow_keys = ["WIKIHOW_USERNAME", "WIKIHOW_PASSWORD"]
 11 | unsplash_keys = [
 12 |     "UNSPLASH_ACCESS_KEY",
 13 |     "UNSPLASH_SECRET_KEY",
 14 |     "UNSPLASH_REDIRECT_URI",
 15 |     "UNSPLASH_CODE",
 16 | ]
 17 | 
 18 | all_keys_to_check = {
 19 |     "Reddit": reddit_keys,
 20 |     "WikiHow": wikihow_keys,
 21 |     "Unsplash": unsplash_keys,
 22 | }
 23 | 
 24 | 
 25 | def reddit_auth():
 26 |     return {
 27 |         "client_id": env.str("REDDIT_CLIENT_ID", ""),
 28 |         "client_secret": env.str("REDDIT_CLIENT_SECRET", ""),
 29 |         "user_agent": env.str("REDDIT_USER_AGENT", ""),
 30 |     }
 31 | 
 32 | 
 33 | def wikihow_auth():
 34 |     return {
 35 |         "username": env.str("WIKIHOW_USERNAME", ""),
 36 |         "password": env.str("WIKIHOW_PASSWORD", ""),
 37 |     }
 38 | 
 39 | 
 40 | def unsplash_auth():
 41 |     return {
 42 |         "unsplash_access_key": env.str("UNSPLASH_ACCESS_KEY", ""),
 43 |         "unsplash_secret_key": env.str("UNSPLASH_SECRET_KEY", ""),
 44 |         "unsplash_redirect_uri": env.str("UNSPLASH_REDIRECT_URI", ""),
 45 |         "unsplash_code": env.str("UNSPLASH_CODE", ""),
 46 |     }
 47 | 
 48 | 
 49 | def pixabay_auth():
 50 |     return {"pixabay_key": env.str("PIXABAY_KEY", "")}
 51 | 
 52 | 
 53 | def pexels_auth():
 54 |     return {"pexels_key": env.str("PEXELS_KEY", "")}
 55 | 
 56 | 
 57 | def _get_missing_keys(key_variables):
 58 |     missing = []
 59 |     for key_name in key_variables:
 60 |         if len(env.str(key_name, "").strip()) == 0:
 61 |             missing.append(key_name)
 62 |     return missing
 63 | 
 64 | 
 65 | def check_keys(key_variables, name):
 66 |     missing = _get_missing_keys(key_variables)
 67 |     if len(missing) > 0:
 68 |         logger.warning("Missing keys for {}: {}".format(name, missing))
 69 |         return False
 70 |     return True
 71 | 
 72 | 
 73 | def check_environment_variables():
 74 |     print("CHECKING ENVIRONMENT VARIABLES")
 75 |     valid_env_file = all(
 76 |         check_keys(all_keys_to_check[element], element) for element in all_keys_to_check
 77 |     )
 78 | 
 79 |     if not valid_env_file:
 80 |         print_env_file_warning()
 81 | 
 82 |     return valid_env_file
 83 | 
 84 | 
 85 | def print_env_file_warning():
 86 |     env_message = """
 87 |     Hi! Before you can run talkgenerator you need to set some secret keys in an .env file.
 88 | 
 89 |     Which keys?
 90 |     -------------
 91 |     Take a look at https://github.com/korymath/talk-generator#setting-up-required-authentication
 92 | 
 93 |     Creating an .env file
 94 |     -------------
 95 |     $ touch .env
 96 |     $ echo VARIABLE_NEEDED=VALUE >> .env
 97 |     $ echo OTHER_VARIABLE_NEEDED=VALUE >> .env
 98 | 
 99 |     or you can use your favorite text editor (vi, nano, etc) to create it.
100 |     """
101 | 
102 |     logger.error(env_message)
103 | 


--------------------------------------------------------------------------------
/talkgenerator/schema/presentation_schema_types.py:
--------------------------------------------------------------------------------
  1 | from talkgenerator.schema.slide_schemas import *
  2 | from talkgenerator.schema import slide_topic_generators
  3 | from talkgenerator.schema.presentation_schema import PresentationSchema
  4 | from talkgenerator.datastructures.slide_generator_data import ConstantWeightFunction
  5 | from talkgenerator.datastructures.slide_generator_data import SlideGeneratorData
  6 | from talkgenerator.slide import powerpoint_slide_creator
  7 | from talkgenerator.slide import slide_generator_types
  8 | 
  9 | # ==================================
 10 | # =====  PRESENTATION SCHEMAS  =====
 11 | # ==================================
 12 | 
 13 | 
 14 | # This object holds all the information about how to generate the presentation
 15 | presentation_schema = PresentationSchema(
 16 |     # Basic powerpoint generator
 17 |     powerpoint_creator=powerpoint_slide_creator.create_new_powerpoint,
 18 |     # Topic per slide generator
 19 |     seed_generator=slide_topic_generators.SideTrackingTopicGenerator,
 20 |     # Title of the presentation
 21 |     title_generator=talk_title_generator,
 22 |     # Slide generators
 23 |     slide_generators=all_slide_generators,
 24 |     # Max tags
 25 |     max_allowed_tags=default_max_allowed_tags,
 26 | )
 27 | 
 28 | # Interview schema: Disallow about_me slides
 29 | interview_max_allowed_tags = default_max_allowed_tags.copy()
 30 | interview_max_allowed_tags["about_me"] = 0
 31 | 
 32 | interview_schema = PresentationSchema(
 33 |     # Basic powerpoint generator
 34 |     powerpoint_creator=powerpoint_slide_creator.create_new_powerpoint,
 35 |     # Topic per slide generator
 36 |     seed_generator=slide_topic_generators.SideTrackingTopicGenerator,
 37 |     # Title of the presentation
 38 |     title_generator=talk_title_generator,
 39 |     # Slide generators
 40 |     slide_generators=all_slide_generators,
 41 |     # Max tags
 42 |     max_allowed_tags=interview_max_allowed_tags,
 43 | )
 44 | 
 45 | # Test schema: for testing purposes
 46 | 
 47 | test_schema = PresentationSchema(
 48 |     # Basic powerpoint generator
 49 |     powerpoint_slide_creator.create_new_powerpoint,
 50 |     # Title of the presentation
 51 |     title_generator=talk_title_generator,
 52 |     # Topic per slide generator
 53 |     # seed_generator=slide_topic_generators.SideTrackingTopicGenerator,
 54 |     seed_generator=slide_topic_generators.IdentityTopicGenerator,
 55 |     # Slide generators
 56 |     slide_generators=title_slide_generators
 57 |     + [
 58 |         SlideGeneratorData(
 59 |             # slide_templates.generate_image_slide(
 60 |             slide_generator_types.ImageSlideGenerator.of(
 61 |                 inspiration_title_generator, generate_unsplash_image
 62 |             ),
 63 |             weight_function=ConstantWeightFunction(8),
 64 |             allowed_repeated_elements=10,
 65 |             name="Test sourcing",
 66 |         )
 67 |     ],
 68 |     # ignore_weights=True,
 69 | )
 70 | 
 71 | 
 72 | # TED schema: using only images from approved sources
 73 | ted_schema = PresentationSchema(
 74 |     # Basic powerpoint generator
 75 |     powerpoint_creator=powerpoint_slide_creator.create_new_powerpoint,
 76 |     # Topic per slide generator
 77 |     seed_generator=slide_topic_generators.SideTrackingTopicGenerator,
 78 |     # Title of the presentation
 79 |     title_generator=talk_ted_title_generator,
 80 |     # Slide generators
 81 |     slide_generators=title_slide_generators
 82 |     + history_slide_generators_copyright_free
 83 |     + single_image_slide_generators_copyright_free
 84 |     + statement_slide_generators_copyright_free
 85 |     + captioned_images_slide_generators_copyright_free
 86 |     + own_chart_generators
 87 |     + conclusion_slide_generators_copyright_free,
 88 |     # Max tags
 89 |     max_allowed_tags={
 90 |         # Absolute maxima
 91 |         "title": 1,
 92 |         "history": 1,
 93 |         "anecdote": 1,
 94 |         "location_chart": 1,
 95 |         "chart": 1,
 96 |         "deep": 2,
 97 |         # Relative (procentual) maxima
 98 |         "two_captions": 0.3,
 99 |         "three_captions": 0.2,
100 |         "multi_captions": 0.3,
101 |         "gif": 0.5,
102 |         "quote": 0.2,
103 |         "statement": 0.2,
104 |     },
105 | )
106 | 
107 | schemas = {
108 |     "default": presentation_schema,
109 |     "interview": interview_schema,
110 |     "test": test_schema,
111 |     "ted": ted_schema,
112 | }
113 | 
114 | 
115 | def get_schema(name):
116 |     return schemas[name]
117 | 


--------------------------------------------------------------------------------
/tests/test_language_util.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import unittest
  3 | 
  4 | from talkgenerator.util import language_util
  5 | 
  6 | 
  7 | class LanguageUtilTest(unittest.TestCase):
  8 |     def setUp(self) -> None:
  9 |         random.seed(123)
 10 | 
 11 |     def test_check_and_download_no_exception(self):
 12 |         language_util.check_and_download()
 13 | 
 14 |     def test_to_plural(self):
 15 |         self.assertEqual("cats", language_util.to_plural("a cat"))
 16 |         self.assertEqual("cats", language_util.to_plural("cat"))
 17 |         self.assertEqual("cats", language_util.to_plural("cats"))
 18 | 
 19 |     def test_is_noun(self):
 20 |         self.assertTrue(language_util.is_noun("cat"))
 21 |         self.assertFalse(language_util.is_noun("see"))
 22 |         self.assertFalse(language_util.is_noun("because"))
 23 | 
 24 |     def test_is_verb(self):
 25 |         self.assertTrue(language_util.is_verb("see"))
 26 |         self.assertFalse(language_util.is_verb("cat"))
 27 |         self.assertFalse(language_util.is_verb("because"))
 28 | 
 29 |     def test_to_singular(self):
 30 |         self.assertEqual("cat", language_util.to_singular("cat"))
 31 |         self.assertEqual("cat", language_util.to_singular("cats"))
 32 | 
 33 |     def test_ing(self):
 34 |         self.assertEqual("toying", language_util.to_ing_form("toy"))
 35 |         self.assertEqual("playing", language_util.to_ing_form("play"))
 36 |         self.assertEqual("lying", language_util.to_ing_form("lie"))
 37 |         self.assertEqual("flying", language_util.to_ing_form("fly"))
 38 |         self.assertEqual("fleeing", language_util.to_ing_form("flee"))
 39 |         self.assertEqual("making", language_util.to_ing_form("make"))
 40 | 
 41 |     def test_verb_detection(self):
 42 |         self.assertEqual(
 43 |             "ACT like a cat",
 44 |             language_util.apply_function_to_verb("act like a cat", str.upper),
 45 |         )
 46 |         # self.assertEqual("kitten PROOF your house",
 47 |         #                  language_util.apply_function_to_verb("kitten proof your house", str.upper))
 48 | 
 49 |     def test_to_present_participle(self):
 50 |         self.assertEqual(
 51 |             "acting like a cat", language_util.to_present_participle("act like a cat")
 52 |         )
 53 |         self.assertEqual(
 54 |             "quitly acting like a cat",
 55 |             language_util.to_present_participle("quitly act like a cat"),
 56 |         )
 57 | 
 58 |     def test_replace(self):
 59 |         self.assertEqual(
 60 |             "this is your test",
 61 |             language_util.replace_word("this is my test", "my", "your"),
 62 |         )
 63 |         self.assertEqual(
 64 |             "test if morphed, before comma",
 65 |             language_util.replace_word(
 66 |                 "test if changed, before comma", "changed", "morphed"
 67 |             ),
 68 |         )
 69 |         self.assertEqual(
 70 |             "Success capital",
 71 |             language_util.replace_word("Test capital", "test", "success"),
 72 |         )
 73 |         self.assertEqual(
 74 |             "Your test is testing if your, is changed",
 75 |             language_util.replace_word(
 76 |                 "My test is testing if my, is changed", "my", "your"
 77 |             ),
 78 |         )
 79 |         self.assertEqual(
 80 |             "Last word is morphed",
 81 |             language_util.replace_word("Last word is changed", "changed", "morphed"),
 82 |         )
 83 | 
 84 |     def test_get_last_noun_and_article(self):
 85 |         self.assertEqual(
 86 |             "a cat",
 87 |             language_util.get_last_noun_and_article("introduce your family to a cat"),
 88 |         )
 89 |         self.assertEqual(
 90 |             "the family",
 91 |             language_util.get_last_noun_and_article("show your cat to the family"),
 92 |         )
 93 |         self.assertEqual(
 94 |             "my cat", language_util.get_last_noun_and_article("What to do with my cat")
 95 |         )
 96 |         self.assertEqual(
 97 |             "your cat", language_util.get_last_noun_and_article("do you like your cat")
 98 |         )
 99 | 
100 |     def test_replace_pronouns(self):
101 |         self.assertEqual(
102 |             "I care about me and my family",
103 |             language_util.second_to_first_pronouns("I care about you and your family"),
104 |         )
105 | 
106 |         # def test_is_noun(self):
107 |         #     self.assertTrue(language_util.is_noun("cat"))
108 |         #     self.assertTrue(language_util.is_noun("dog"))
109 |         #     self.assertTrue(language_util.is_noun("food"))
110 |         #     self.assertTrue(language_util.is_noun("pet"))
111 | 
112 |         # def test_is_verb(self):
113 |         #     self.assertTrue(language_util.is_verb("act"))
114 |         #     self.assertTrue(language_util.is_verb("pet"))
115 |         #     self.assertTrue(language_util.is_verb("kiss"))
116 | 
117 |         # def test_is_verb_action(self):
118 |         #     self.assertTrue(language_util.is_verb("kill a mockingbird"))
119 |         #     self.assertTrue(language_util.is_verb("act like a cat"))
120 |         #     self.assertTrue(language_util.is_verb("speak English"))
121 | 
122 |     if __name__ == "__main__":
123 |         unittest.main()
124 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Talk Powerpoint Generator
  2 | 
  3 | [![CircleCI](https://circleci.com/gh/korymath/talk-generator.svg?style=svg&circle-token=dcba7d5a9ff7953cff0526e201990c0b811b3aae)](https://circleci.com/gh/korymath/talk-generator)
  4 | [![codecov](https://codecov.io/gh/korymath/talk-generator/branch/master/graph/badge.svg?token=gqkCyuXop0)](https://codecov.io/gh/korymath/talk-generator)
  5 | [![License](https://img.shields.io/github/license/mashape/apistatus.svg)](https://github.com/korymath/britbot/blob/master/LICENSE.md)
  6 | 
  7 | This program automatically generates PowerPoints about any topic.
  8 | These presentation slide decks can be used by improvisers for the improvisational comedy format *"Improvised TED talk"* or *"Powerpoint Karaoke"*.
  9 | In such games, the actors have to present an unseen presentation slide deck, but pretend to be an expert and explain *"their"* slide show choices.
 10 | 
 11 | ## Demo
 12 | 
 13 | Ty out this generator on our online platform: [talkgenerator.com](http://talkgenerator.com/).
 14 | 
 15 | ### Example
 16 | 
 17 | ![Automatically Generated](https://media.giphy.com/media/MXXe522nIAA9JZjExI/giphy.gif)
 18 | 
 19 | ## Easy Install and Run
 20 | 
 21 | Our program relies on certain APIs that require authentication in order to use it.
 22 | Create a file named `.env` (don't forget the period) in your project directory, and fill this with the correct API keys as described on our [wiki page about this](https://github.com/korymath/talk-generator/wiki/Setting-Up-API-Keys).
 23 | 
 24 | ```sh
 25 | # Make a new Python 3 virtual environment
 26 | python3 -m venv venv;
 27 | 
 28 | # Activate the virtual environment
 29 | source venv/bin/activate;
 30 | 
 31 | # Upgrade pip and install  requirements
 32 | pip install --upgrade pip setuptools;
 33 | python3 -m pip install -r requirements.txt;
 34 | 
 35 | # Download NLTK dependencies
 36 | python run_nltk_download.py;
 37 | 
 38 | # Install the Talk Generator
 39 | pip install -e .;
 40 | 
 41 | # Generate a 10 slide talk with topic peanuts
 42 | talkgenerator --topic "peanuts" --num_slides 10
 43 | ```
 44 | 
 45 | ### Run arguments
 46 | 
 47 | | Argument               | Description               |
 48 | | ---------------------- | ------------------------- |
 49 | | `topic` | The topic of the generator. This works best if it is a common, well-known noun. Use comma-separated words to generate a slide deck about multiple topics |
 50 | | `slides` | The number of slides in the generated presentation (*default: 10*) |
 51 | | `schema` | The presentation schema to use when generating the presentation. Currently, only two modes are implemented, being `default` and `test` (for testing during development) |
 52 | | `title` | Title of the presentation. Either `topic` or this one should to be set in order to generate a slide deck (just setting `topic` is usually more fun though)  |
 53 | | `presenter` | The name that will be present on the first slide. Leave blank for an automatically generated name |
 54 | | `output_folder` | The folder to output the generated presentations (*default: `./output/`*) |
 55 | | `save_ppt` | If this flag is true(*default*), the generated powerpoint will be saved on the computer in the `output_folder`|
 56 | | `open_ppt` | If this flag is true (*default*), the generated powerpoint will automatically open after generating|
 57 | | `parallel` | If this flag is true (*default*), the generator will generate all slides in parallel |
 58 | 
 59 | ## Program structure
 60 | 
 61 | See the [wiki](https://github.com/korymath/talk-generator/wiki/Program-structure) to know more about the inner implementation.
 62 | 
 63 | ## Tests
 64 | 
 65 | Test files are `tests/*.py`, prefixed with `test_`. Test files use the `unittest` module.
 66 | They can easily be run all together when using PyCharm by right clicking on `talk-generator` and pressing *Run 'Unittests in talk-generator'*
 67 | 
 68 | ```sh
 69 | coverage run -m pytest; coverage html
 70 | ```
 71 | 
 72 | Test coverage is automatically handled by `codecov`. Tests are automatically run with CircleCI based on the `.yml` file in the `.circleci` directory.
 73 | 
 74 | ## Credits
 75 | 
 76 | This generator is made by
 77 | [Thomas Winters](https://github.com/TWinters)
 78 | and [Kory Mathewson](https://github.com/korymath),
 79 | with contributions from
 80 | [Shaun Farrugia](https://github.com/h0h0h0)
 81 | and [Julian Faid](https://github.com/jfaid).
 82 | 
 83 | If you would like to refer to this project in academic work, please cite the following paper:
 84 | 
 85 | Winters T., Mathewson K.W. (2019) **Automatically Generating Engaging Presentation Slide Decks**. In: Ekárt A., Liapis A., Castro Pena M. (eds) Computational Intelligence in Music, Sound, Art and Design. EvoMUSART 2019. Lecture Notes in Computer Science, vol 11453. Springer, Cham
 86 | 
 87 | ```sh
 88 | @InProceedings{winters2019tedric,
 89 |     author="Winters, Thomas
 90 |     and Mathewson, Kory W.",
 91 |     editor="Ek{\'a}rt, Anik{\'o}
 92 |     and Liapis, Antonios
 93 |     and Castro Pena, Mar{\'i}a Luz",
 94 |     title="Automatically Generating Engaging Presentation Slide Decks",
 95 |     booktitle="Computational Intelligence in Music, Sound, Art and Design",
 96 |     year="2019",
 97 |     publisher="Springer International Publishing",
 98 |     address="Cham",
 99 |     pages="127--141",
100 |     isbn="978-3-030-16667-0"
101 | }
102 | ```
103 | 
104 | ## License
105 | 
106 | MIT License. Copyright (c) 2018-2020 [Kory Mathewson](https://github.com/korymath) and [Thomas Winters](https://github.com/TWinters)
107 | 


--------------------------------------------------------------------------------
/tests/test_text_generator.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import unittest
  3 | 
  4 | from talkgenerator.sources import text_generator
  5 | from talkgenerator.util import os_util
  6 | 
  7 | 
  8 | class TextGeneratorTest(unittest.TestCase):
  9 |     def setUp(self) -> None:
 10 |         random.seed(123)
 11 | 
 12 |     def test_variable_extraction(self):
 13 |         self.assertEqual(
 14 |             {"test", "adjective"},
 15 |             text_generator.get_format_variables("this {test} is going {adjective}"),
 16 |         )
 17 |         self.assertEqual(
 18 |             {"test"},
 19 |             text_generator.get_format_variables("this {test} is testing for {} empty"),
 20 |         )
 21 |         self.assertEqual(
 22 |             {"test"},
 23 |             text_generator.get_format_variables(
 24 |                 "this {test} is testing if {test} only appears once"
 25 |             ),
 26 |         )
 27 |         self.assertEqual(
 28 |             set(),
 29 |             text_generator.get_format_variables(
 30 |                 "this test only has {} some {} empty names"
 31 |             ),
 32 |         )
 33 | 
 34 |     def test_variable_extraction_with_commands(self):
 35 |         self.assertEqual(
 36 |             {"test", "adjective"},
 37 |             text_generator.get_format_variables(
 38 |                 "this {test.title.s} is going {adjective.lower}"
 39 |             ),
 40 |         )
 41 |         self.assertEqual(
 42 |             {"test", "one"},
 43 |             text_generator.get_format_variables(
 44 |                 "this {test.title} is testing for {one}"
 45 |             ),
 46 |         )
 47 |         self.assertEqual(
 48 |             {"test"},
 49 |             text_generator.get_format_variables(
 50 |                 "this {test.title} is testing for {} empty"
 51 |             ),
 52 |         )
 53 | 
 54 |     def test_not_using_unusable_template(self):
 55 |         """ Tests if the generator is not raising an error when variables are missing to generate, and only uses other
 56 |         generator """
 57 |         possible_templates = ["This is {adjective}", "This is {noun}"]
 58 |         templated_text_generator = text_generator.TemplatedTextGenerator(
 59 |             templates_list=possible_templates
 60 |         )
 61 |         for _ in range(100):
 62 |             self.assertEqual(
 63 |                 "This is possible",
 64 |                 templated_text_generator.generate({"adjective": "possible"}),
 65 |             )
 66 |         for _ in range(100):
 67 |             self.assertEqual(
 68 |                 "This is a test", templated_text_generator.generate({"noun": "a test"})
 69 |             )
 70 | 
 71 |     def test_all_possible_outcomes(self):
 72 |         possible_templates = ["This is {adjective}", "This is {noun}"]
 73 |         templated_text_generator = text_generator.TemplatedTextGenerator(
 74 |             templates_list=possible_templates
 75 |         )
 76 |         expected = {"This is possible", "This is a test"}
 77 |         all_generations = set()
 78 |         for _ in range(10000):
 79 |             if all_generations == expected:
 80 |                 break
 81 |             all_generations.add(
 82 |                 templated_text_generator.generate(
 83 |                     {"adjective": "possible", "noun": "a test"}
 84 |                 )
 85 |             )
 86 | 
 87 |         self.assertEqual(expected, all_generations)
 88 | 
 89 |     def test_variable_and_function_extraction(self):
 90 | 
 91 |         self.assertEqual(
 92 |             {("nice", ".title.lower.upper"), ("is", ".lower.ing"), ("test", ".title")},
 93 |             text_generator.get_format_variables_and_functions(
 94 |                 "this {is.lower.ing} a {test.title}, {nice.title.lower.upper} right?"
 95 |             ),
 96 |         )
 97 | 
 98 |     def test_functions_on_variables(self):
 99 |         template_text_generator = text_generator.TemplatedTextGenerator(
100 |             templates_list=["this is a {test.title}"]
101 |         )
102 |         result = template_text_generator.generate({"test": "something"})
103 |         self.assertEqual("this is a Something", result)
104 | 
105 |     def test_functions_on_multiple_variables(self):
106 |         template_text_generator = text_generator.TemplatedTextGenerator(
107 |             templates_list=[
108 |                 "this is a {test.title} using multiple {variable.plural.title}"
109 |             ]
110 |         )
111 |         result = template_text_generator.generate(
112 |             {"test": "something", "variable": "instance"}
113 |         )
114 |         self.assertEqual("this is a Something using multiple Instances", result)
115 | 
116 |     def test_tracery_grammar(self):
117 |         tracery = text_generator.TraceryTextGenerator("data/text-templates/name.json")
118 |         for i in range(5):
119 |             self.assertTrue(tracery.generate())
120 | 
121 |     def test_ted_title(self):
122 |         tracery = text_generator.TraceryTextGenerator(
123 |             "data/text-templates/talk_title.json", "ted_title"
124 |         )
125 |         words = list(os_util.read_lines("data/eval/common_words.txt"))
126 |         random.shuffle(words)
127 |         words = words[0:10]
128 |         generations = set()
129 |         for i in range(100):
130 |             topic = random.choice(words)
131 |             generated = tracery.generate({"seed": topic})
132 |             generations.add(generated)
133 |             self.assertTrue(generated)
134 | 
135 |         generations = list(generations)
136 |         generations.sort()
137 |         print("\n".join(generations))
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     unittest.main()
142 | 


--------------------------------------------------------------------------------
/talkgenerator/data/text-templates/talk_title.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "origin": [
  3 |     "#ted_title#",
  4 |     "#ted_title#",
  5 |     "#in_between#",
  6 |     "#title#",
  7 |     "#title#",
  8 |     "#title#"
  9 |   ],
 10 |   "ted_title": [
 11 |     "I Quit {seed.wikihow_action.ing.title} (and so should you)",
 12 |     "From {seed.title} to {seed.conceptnet_related_single_word.title}: {seed.wikihow_action.ing.title}",
 13 |     "Help, I Keep Having To {seed.wikihow_action.title}",
 14 |     "How many {seed.plural.lower} are \"too many {seed.plural.lower}\"?",
 15 |     "Never, Ever {seed.wikihow_action.title}",
 16 |     "Putting {seed.plural.title} On The Blockchain",
 17 |     "The #two_to_five# {seed.first_letter.title}'s of {seed.plural.title}",
 18 |     "The True Meaning of {seed.plural.title}",
 19 |     "The Unexpected Benefits of {seed.plural.title}",
 20 |     "The Way We Think about {seed.plural.title} is Dead Wrong",
 21 |     "The Worst Thing about {seed.plural.title} You Never Noticed",
 22 |     "Underwater {seed.plural.title}",
 23 |     "Want to be Happier? {seed.wikihow_action.title}!",
 24 |     "Want to Be Happy? {seed.wikihow_action.title}!",
 25 |     "Why People Call Me \"The {seed.singular.title}\"",
 26 |     "More {seed.plural.title}, More {seed.conceptnet_related_single_word.plural.title}",
 27 |     "More {seed.plural.title}, More Happiness",
 28 |     "Why We All Need to {seed.wikihow_action.title}"
 29 |   ],
 30 |   "in_between": [
 31 |     "Less {seed.plural.title}, More Happiness",
 32 |     "Despite what you've heard, stupid {seed.plural} exists",
 33 |     "How {seed.plural.title} Can Change the World",
 34 |     "How {seed.plural.title} Caused My Bankruptcy",
 35 |     "How to {seed.wikihow_action.lower} (without getting fired)",
 36 |     "How {seed.plural} can save humanity and why we should legalize them everywhere now",
 37 |     "Why I Disallow My Children to {seed.wikihow_action.title.2_to_1_pronouns} (and so should you)",
 38 |     "You are {seed.wikihow_action.ing} (and Don't Even Know It)",
 39 |     "How I {seed.wikihow_action.title}: Confessions of {seed.title.a}"
 40 |   ],
 41 |   "title": [
 42 |     "#two_or_larger# Things You Didn't Know About {seed.plural.title}",
 43 |     "#one_or_larger##number# Things You Didn't Know About {seed.plural.title}",
 44 |     "99 problems but {seed.a} ain't one",
 45 |     "Help, My Boyfriend Is Obsessed With {seed.plural.title}",
 46 |     "How {seed.singular.title.a} Made Me Feel like a Millionaire",
 47 |     "How I Held My {seed.plural.title} for #one_or_larger##number# Minutes",
 48 |     "How to Fail Miserably at {seed.wikihow_action.ing.title}",
 49 |     "How to Get Rid of {seed.plural.title}",
 50 |     "How to Let {seed.plural.title} Be Your Guide",
 51 |     "How to look like you're working while {seed.wikihow_action.ing.lower}",
 52 |     "How to {seed.wikihow_action} Most Effectively",
 53 |     "How to Make {seed.plural.title} Your Friend",
 54 |     "How to Spot {seed.singular.a.title}",
 55 |     "How to subtly put \"{seed.conceptnet_related_single_word}\" in every sentence",
 56 |     "How {seed.plural.title} Inspires Action",
 57 |     "How {seed.lower} might save your life",
 58 |     "I HATE {seed.plural.upper}",
 59 |     "I'm {seed.title.a} AND a U.S. Marine: ASK ME ANYTHING!",
 60 |     "Inside the Mind of a Master {seed.singular.title}",
 61 |     "Inside the Mind of {seed.singular.a.title}",
 62 |     "My Favourite {seed.singular.title} Lifehacks",
 63 |     "My Hobby: {seed.wikihow_action.ing.title}",
 64 |     "My Irrational Fear of {seed.plural.title}",
 65 |     "The Art of {seed.plural.title}",
 66 |     "An overview of sci-fi stories about {seed.wikihow_action.ing.lower}",
 67 |     "The Art of {seed.wikihow_action.ing.title}",
 68 |     "The Biggest Concerns About {seed.plural.title}",
 69 |     "The Danger of a Single {seed.singular.title}",
 70 |     "The Happy Secret to Better {seed.plural.title}",
 71 |     "The Power of {seed.plural.title}",
 72 |     "The Puzzle of {seed.plural.title}",
 73 |     "The Surprising Science of {seed.plural.title}",
 74 |     "The Surprising Science of {seed.wikihow_action.ing.title}",
 75 |     "The Thrilling Potential of {seed.plural.title}",
 76 |     "The {seed.title} Conundrum",
 77 |     "This Is What Happens When You {seed.wikihow_action.title}",
 78 |     "We need to talk about {seed.plural.lower}",
 79 |     "We. Need. More. {seed.plural.title}.",
 80 |     "What makes a good {seed.singular.lower}? Lessons from the longest study.",
 81 |     "What purpose do {seed.plural.lower} *really* have?",
 82 |     "What Your Choice in {seed.plural.title} Says About You",
 83 |     "What Your {seed.singular.title} Says About You",
 84 |     "What Your {seed.singular.title} Truly Says About You",
 85 |     "Why I Joined {seed.title.a} Cult and Why You Should Too",
 86 |     "Why We All Love {seed.wikihow_action.ing.title}",
 87 |     "Why We Do What We Do To {seed.wikihow_action.title}",
 88 |     "Why {seed.plural.title} Will Ruin Your Life",
 89 |     "Your {seed.plural.title} May Shape Who You Are",
 90 |     "{seed.plural.title} Aren't Everything. Believe Me.",
 91 |     "{seed.plural.title} Kill Creativity",
 92 |     "{seed.title.first_letter}... {seed.first_letter}... {seed.plural}?",
 93 |     "{seed.title}, No Matter What",
 94 |     "{seed.title}: The Power of {seed.wikihow_action.ing.title}",
 95 |     "{seed.wikihow_action.ing.title} in #one_or_larger##number##number# easy steps",
 96 |     "{seed.wikihow_action.ing.title}: Amazing Delight or Sign of Apocalypse?"
 97 |   ],
 98 |   "number": ["0","1","2","3","4","5","6","7","8","9"],
 99 |   "one_or_larger": ["1","2","3","4","5","6","7","8","9"],
100 |   "two_or_larger": ["2","3","4","5","6","7","8","9"],
101 |   "two_to_five": ["2","3","4","5"]
102 | }


--------------------------------------------------------------------------------
/talkgenerator/schema/content_generator_structures.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains structures that are helpful for certain content generators, but not general enough for generator_util
  3 | """
  4 | import os
  5 | import random
  6 | from typing import Tuple
  7 | 
  8 | from talkgenerator.sources import conceptnet
  9 | from talkgenerator.sources import goodreads, text_generator, reddit, wikihow
 10 | from talkgenerator.util.generator_util import (
 11 |     ExternalImageListGenerator,
 12 |     RelatedMappingGenerator,
 13 | )
 14 | from talkgenerator.util.generator_util import FromListGenerator
 15 | from talkgenerator.util.generator_util import (
 16 |     SeededGenerator,
 17 |     BackupGenerator,
 18 | )
 19 | 
 20 | 
 21 | # = TEXT GENERATORS=
 22 | from talkgenerator.datastructures.image_data import ImageData
 23 | 
 24 | 
 25 | def create_templated_text_generator(filename):
 26 |     actual_file = os_util.to_actual_file(filename)
 27 |     return text_generator.TemplatedTextGenerator(actual_file).generate
 28 | 
 29 | 
 30 | def create_tracery_generator(filename, main="origin"):
 31 |     actual_file = os_util.to_actual_file(filename)
 32 |     return text_generator.TraceryTextGenerator(actual_file, main).generate
 33 | 
 34 | 
 35 | # GOODREAD QUOTES
 36 | class GoodReadsQuoteGenerator(object):
 37 |     def __init__(self, max_quote_length):
 38 |         self._max_quote_length = max_quote_length
 39 | 
 40 |     def __call__(self, presentation_context):
 41 |         def generator(seed):
 42 |             return [
 43 |                 quote
 44 |                 for quote in goodreads.search_quotes(seed, 50)
 45 |                 if len(quote) <= self._max_quote_length
 46 |             ]
 47 | 
 48 |         return FromListGenerator(SeededGenerator(generator))(presentation_context)
 49 | 
 50 | 
 51 | # REDDIT
 52 | from talkgenerator.util import os_util
 53 | 
 54 | 
 55 | def create_reddit_image_generator(*name):
 56 |     reddit_generator = RedditImageGenerator("+".join(name))
 57 |     return BackupGenerator(reddit_generator.generate, reddit_generator.generate_random)
 58 | 
 59 | 
 60 | class RedditImageSearcher(object):
 61 |     def __init__(self, subreddit: str):
 62 |         self._subreddit = subreddit
 63 | 
 64 |     def __call__(self, seed: str):
 65 |         results = reddit.search_subreddit(
 66 |             self._subreddit, str(seed) + " nsfw:no (url:.jpg OR url:.png OR url:.gif)"
 67 |         )
 68 |         if bool(results):
 69 |             return [
 70 |                 ImageData(
 71 |                     image_url=post.url,
 72 |                     source="u/"
 73 |                     + post.author.name
 74 |                     + " (on "
 75 |                     + post.subreddit_name_prefixed
 76 |                     + ")",
 77 |                 )
 78 |                 for post in results
 79 |             ]
 80 | 
 81 | 
 82 | class RedditImageGenerator:
 83 |     def __init__(self, subreddit: str):
 84 |         self._subreddit = subreddit
 85 | 
 86 |         self._generate = ExternalImageListGenerator(
 87 |             SeededGenerator(RedditImageSearcher(self._subreddit)),
 88 |         )
 89 | 
 90 |     def generate(self, presentation_context):
 91 |         return self._generate(presentation_context)
 92 | 
 93 |     def generate_random(self, _):
 94 |         return self.generate({"seed": ""})
 95 | 
 96 | 
 97 | # ABOUT ME
 98 | 
 99 | _about_me_facts_grammar = "data/text-templates/about_me_facts.json"
100 | job_description_generator = create_tracery_generator(
101 |     _about_me_facts_grammar, "job_description"
102 | )
103 | country_description_generator = create_tracery_generator(
104 |     _about_me_facts_grammar, "country_description"
105 | )
106 | 
107 | 
108 | def _apply_country_prefix(country_name):
109 |     if random.uniform(0, 1) < 0.55:
110 |         return country_name
111 |     return country_description_generator() + country_name
112 | 
113 | 
114 | class CountryPrefixApplier(object):
115 |     def __init__(self):
116 |         pass
117 | 
118 |     def __call__(self, x: Tuple[str, str]):
119 |         return _apply_country_prefix(x[0]), x[1]
120 | 
121 | 
122 | def _apply_job_prefix(job_name):
123 |     if random.uniform(0, 1) < 0.55:
124 |         return job_name
125 |     return job_description_generator() + ": " + job_name
126 | 
127 | 
128 | class JobPrefixApplier(object):
129 |     def __init__(self):
130 |         pass
131 | 
132 |     def __call__(self, x: Tuple[str, str]):
133 |         return _apply_job_prefix(x[0]), x[1]
134 | 
135 | 
136 | # SPLITTER
137 | 
138 | 
139 | class SplitCaptionsGenerator(object):
140 |     def __init__(self, generator):
141 |         self._generator = generator
142 | 
143 |     def __call__(self, presentation_context):
144 |         line = self._generator(presentation_context)
145 |         parts = line.split("|")
146 |         return parts
147 | 
148 | 
149 | # BOLD STATEMENT
150 | 
151 | bold_statement_templated_file = os_util.to_actual_file(
152 |     "data/text-templates/bold_statements.txt"
153 | )
154 | bold_statement_templated_generator = create_templated_text_generator(
155 |     bold_statement_templated_file
156 | )
157 | 
158 | 
159 | def generate_wikihow_bold_statement(presentation_context):
160 |     seed = presentation_context["seed"]
161 |     template_values = presentation_context
162 |     related_actions = wikihow.get_related_wikihow_actions(seed)
163 |     if related_actions:
164 |         action = random.choice(related_actions)
165 |         template_values.update({"action": action.title(), "seed": seed})
166 | 
167 |     return bold_statement_templated_generator(template_values)
168 | 
169 | 
170 | class ConceptNetMapper(RelatedMappingGenerator):
171 |     def __init__(self, generator):
172 |         super().__init__(conceptnet.weighted_related_word_generator, generator)
173 | 


--------------------------------------------------------------------------------
/talkgenerator/datastructures/slide_generator_data.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from functools import lru_cache
  3 | from typing import Collection, Union, Set, Callable, Tuple
  4 | 
  5 | from talkgenerator.datastructures.image_data import ImageData
  6 | 
  7 | 
  8 | logger = logging.getLogger("talkgenerator")
  9 | 
 10 | 
 11 | class PeakedWeight(object):
 12 |     def __init__(
 13 |         self, peak_values: Tuple[int, ...], weight: float, other_weight: float
 14 |     ):
 15 |         self._peak_values = peak_values
 16 |         self._weight = weight
 17 |         self._other_weight = other_weight
 18 | 
 19 |     def __call__(self, slide_nr: int, num_slides: int):
 20 |         actual_peak_values = fix_indices(self._peak_values, num_slides)
 21 |         if slide_nr in actual_peak_values:
 22 |             return self._weight
 23 |         return self._other_weight
 24 | 
 25 | 
 26 | @lru_cache(maxsize=30)
 27 | def fix_indices(values: Collection[int], num_slides: int):
 28 |     return [value % num_slides if value < 0 else value for value in values]
 29 | 
 30 | 
 31 | class ConstantWeightFunction(object):
 32 |     def __init__(self, weight):
 33 |         self._weight = weight
 34 | 
 35 |     def __call__(self, slide_nr, total_slides):
 36 |         return self._weight
 37 | 
 38 | 
 39 | # Classes that are abstractly responsible for generating powerpoints
 40 | 
 41 | 
 42 | class SlideGeneratorData:
 43 |     """ Responsible for providing the slide generator and other attributes, such as its name and weight"""
 44 | 
 45 |     def __init__(
 46 |         self,
 47 |         generator,
 48 |         weight_function: Callable[[int, int], float] = ConstantWeightFunction(1),
 49 |         retries: int = 5,
 50 |         allowed_repeated_elements: int = 0,
 51 |         tags=None,
 52 |         name=None,
 53 |     ):
 54 |         self._generator = generator
 55 |         self._weight_function = weight_function
 56 |         self._retries = retries
 57 |         self._name = name
 58 |         self._allowed_repeated_elements = allowed_repeated_elements
 59 |         if not tags:
 60 |             tags = set()
 61 |         self._tags = tags
 62 | 
 63 |     def generate(self, presentation_context, used_elements):
 64 |         """Generate a slide for a given presentation using the given seed."""
 65 |         logger.debug('slide_generator_data.generate()')
 66 |         logger.debug('presentation_context: {}'.format(presentation_context))
 67 |         logger.debug('used_elements: {}'.format(used_elements))
 68 |         logger.debug('self._allowed_repeated_elements: {}'.format(self._allowed_repeated_elements))
 69 | 
 70 |         # Try a certain amount of times
 71 |         for i in range(self._retries):
 72 |             logger.debug('retry: {}'.format(i))
 73 |             logger.debug('self._generator: {}'.format(self._generator))
 74 |             slide_results = self._generator.generate_slide(
 75 |                 presentation_context, (used_elements, self._allowed_repeated_elements)
 76 |             )
 77 |             logger.debug('slide_results: {}'.format(slide_results))
 78 | 
 79 |             if slide_results:
 80 |                 (slide, generated_elements) = slide_results
 81 |                 logger.debug('slide: {}'.format(slide))
 82 |                 logger.debug('generated_elements: {}'.format(generated_elements))
 83 | 
 84 |                 # If the generated content is nothing, don't try again
 85 |                 if _has_not_generated_something(generated_elements):
 86 |                     return None
 87 | 
 88 |                 if slide:
 89 |                     # Add notes about the generation
 90 |                     slide.set_note(
 91 |                         "Seed: "
 92 |                         + presentation_context["seed"]
 93 |                         + "\nGenerator: "
 94 |                         + str(self)
 95 |                         + " \n Context: "
 96 |                         + str(presentation_context)
 97 |                         + " \n Generated Elements: "
 98 |                         + str(generated_elements)
 99 |                     )
100 | 
101 |                     # Add all sources of generated elements
102 |                     for generated_element in generated_elements:
103 |                         if isinstance(generated_element, ImageData):
104 |                             slide.add_source(generated_element.get_source())
105 | 
106 |                     return slide, generated_elements
107 | 
108 |     def get_weight_for(self, slide_nr: int, total_slides: int) -> float:
109 |         """The weight of the generator for a particular slide.
110 |         Determines how much chance it has being picked for a particular slide number"""
111 |         return self._weight_function(slide_nr, total_slides)
112 | 
113 |     def get_allowed_repeated_elements(self) -> int:
114 |         return self._allowed_repeated_elements
115 | 
116 |     def get_tags(self) -> Set[str]:
117 |         return self._tags
118 | 
119 |     def __str__(self):
120 |         if bool(self._name):
121 |             return str(self._name)
122 |         name = str(self._generator.__name__)
123 |         if name == "<lambda>":
124 |             name = "Unnamed Generator"
125 |         return "SlideGenerator[" + name + "]"
126 | 
127 | 
128 | def _has_not_generated_something(generated_elements) -> bool:
129 |     generated_elements = set(generated_elements)
130 |     _filter_generated_elements(generated_elements)
131 |     return len(generated_elements) == 0
132 | 
133 | 
134 | def _filter_generated_elements(generated_elements: Set[Union[str, bool, None]]):
135 |     if "" in generated_elements:
136 |         generated_elements.remove("")
137 |     if None in generated_elements:
138 |         generated_elements.remove(None)
139 |     if True in generated_elements:
140 |         generated_elements.remove(True)
141 |     if False in generated_elements:
142 |         generated_elements.remove(False)
143 | 


--------------------------------------------------------------------------------
/talkgenerator/slide/slides.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from abc import ABCMeta
  3 | from typing import Dict
  4 | 
  5 | from talkgenerator.slide import powerpoint_slide_creator
  6 | 
  7 | logger = logging.getLogger("talkgenerator")
  8 | 
  9 | 
 10 | class Slide(metaclass=ABCMeta):
 11 |     """ Class representing a slide object that could be used to export to Powerpoint pptx or other representations later
 12 |     """
 13 | 
 14 |     def __init__(self, type_name: str, ppt_slide_creator, arguments: Dict):
 15 |         self._type_name = type_name
 16 |         self._ppt_slide_creator = ppt_slide_creator
 17 |         self._arguments = arguments
 18 |         self._note = ""
 19 |         self._sources = []
 20 | 
 21 |     def add_source(self, source: str):
 22 |         if source is not None:
 23 |             self._sources.append(source)
 24 | 
 25 |     def set_note(self, note: str):
 26 |         self._note = note
 27 | 
 28 |     def create_powerpoint_slide(self, prs):
 29 |         """ Should generate a slide in the powerpoint """
 30 |         ppt_slide = self._ppt_slide_creator(prs, **self._arguments)
 31 |         try:
 32 |             if ppt_slide:
 33 |                 ppt_slide.notes_slide.notes_text_frame.text = self._note
 34 | 
 35 |                 if len(self._sources):
 36 |                     powerpoint_slide_creator.add_sources_note(ppt_slide, self._sources)
 37 | 
 38 |         except AttributeError as e:
 39 |             logger.error("attribute error on create slide {}".format(e))
 40 |         return ppt_slide
 41 | 
 42 |     def to_slide_dictionary(self) -> dict:
 43 |         slide_dict = dict(self._arguments)
 44 |         slide_dict["type"] = self._type_name
 45 |         slide_dict["sources"] = self._sources
 46 |         return slide_dict
 47 | 
 48 | 
 49 | class TitleSlide(Slide):
 50 |     def __init__(self, title:str, subtitle:str):
 51 |         super().__init__(
 52 |             type_name="title",
 53 |             ppt_slide_creator=powerpoint_slide_creator.create_title_slide,
 54 |             arguments={"title": title, "subtitle": subtitle},
 55 |         )
 56 | 
 57 | 
 58 | class LarqeQuoteSlide(Slide):
 59 |     def __init__(self, title:str, text:str, background_image=None):
 60 |         super().__init__(
 61 |             type_name="large_quote",
 62 |             ppt_slide_creator=powerpoint_slide_creator.create_large_quote_slide,
 63 |             arguments={
 64 |                 "title": title,
 65 |                 "text": text,
 66 |                 "background_image": background_image,
 67 |             },
 68 |         )
 69 | 
 70 | 
 71 | class ImageSlide(Slide):
 72 |     def __init__(self, title=None, image_url=None, original_image_size=True):
 73 |         super().__init__(
 74 |             type_name="image",
 75 |             ppt_slide_creator=powerpoint_slide_creator.create_image_slide,
 76 |             arguments={
 77 |                 "title": title,
 78 |                 "image_url": image_url,
 79 |                 "original_image_size": original_image_size,
 80 |             },
 81 |         )
 82 | 
 83 | 
 84 | class FullImageSlide(Slide):
 85 |     def __init__(self, title=None, image_url=None, original_image_size=True):
 86 |         super().__init__(
 87 |             type_name="full_image",
 88 |             ppt_slide_creator=powerpoint_slide_creator.create_full_image_slide,
 89 |             arguments={
 90 |                 "title": title,
 91 |                 "image_url": image_url,
 92 |                 "original_image_size": original_image_size,
 93 |             },
 94 |         )
 95 | 
 96 | 
 97 | class TwoColumnImageSlide(Slide):
 98 |     def __init__(
 99 |         self,
100 |         title=None,
101 |         caption_1=None,
102 |         image_or_text_1=None,
103 |         caption_2=None,
104 |         image_or_text_2=None,
105 |         original_image_size=True,
106 |     ):
107 |         super().__init__(
108 |             type_name="two_column_image",
109 |             ppt_slide_creator=powerpoint_slide_creator.create_two_column_images_slide,
110 |             arguments={
111 |                 "title": title,
112 |                 "caption_1": caption_1,
113 |                 "image_or_text_1": image_or_text_1,
114 |                 "caption_2": caption_2,
115 |                 "image_or_text_2": image_or_text_2,
116 |                 "original_image_size": original_image_size,
117 |             },
118 |         )
119 | 
120 | 
121 | class ThreeColumnImageSlide(Slide):
122 |     def __init__(
123 |         self,
124 |         title=None,
125 |         caption_1=None,
126 |         image_or_text_1=None,
127 |         caption_2=None,
128 |         image_or_text_2=None,
129 |         caption_3=None,
130 |         image_or_text_3=None,
131 |         original_image_size=True,
132 |     ):
133 |         super().__init__(
134 |             type_name="three_column_image",
135 |             ppt_slide_creator=powerpoint_slide_creator.create_three_column_images_slide,
136 |             arguments={
137 |                 "title": title,
138 |                 "caption_1": caption_1,
139 |                 "image_or_text_1": image_or_text_1,
140 |                 "caption_2": caption_2,
141 |                 "image_or_text_2": image_or_text_2,
142 |                 "caption_3": caption_3,
143 |                 "image_or_text_3": image_or_text_3,
144 |                 "original_image_size": original_image_size,
145 |             },
146 |         )
147 | 
148 | 
149 | class ChartSlide(Slide):
150 |     def __init__(self, title, chart_type, chart_data, chart_modifier=None):
151 |         super().__init__(
152 |             type_name="chart",
153 |             ppt_slide_creator=powerpoint_slide_creator.create_chart_slide,
154 |             arguments={
155 |                 "title": title,
156 |                 "chart_type": chart_type,
157 |                 "chart_data": chart_data,
158 |                 "chart_modifier": chart_modifier,
159 |             },
160 |         )
161 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/conceptnet.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import logging
  3 | from functools import lru_cache
  4 | from pathlib import Path
  5 | from urllib.parse import urlencode
  6 | 
  7 | import requests
  8 | # from cachier import cachier
  9 | 
 10 | from talkgenerator.util import generator_util, cache_util
 11 | 
 12 | URL = "http://api.conceptnet.io/c/en/{}?"
 13 | 
 14 | _LOCATION_ARGUMENTS = cache_util.HashableDict(rel="/r/AtLocation", limit=100)
 15 | _HASA_ARGUMENTS = cache_util.HashableDict(rel="/r/HasA", limit=200)
 16 | _DEFAULT_ARGUMENTS = cache_util.HashableDict(limit=200)
 17 | 
 18 | # HELPERS
 19 | _PROHIBITED_SEARCH_TERMS = (
 20 |     "a",
 21 |     "your",
 22 |     "my",
 23 |     "her",
 24 |     "his",
 25 |     "its",
 26 |     "their",
 27 |     "be",
 28 |     "an",
 29 |     "the",
 30 |     "you",
 31 |     "are",
 32 | )
 33 | 
 34 | logger = logging.getLogger("talkgenerator.conceptnet")
 35 | 
 36 | 
 37 | # Helpers
 38 | def _remove_prohibited_words(word):
 39 |     return [part for part in word.split(" ") if part not in _PROHIBITED_SEARCH_TERMS]
 40 | 
 41 | 
 42 | def normalise(word):
 43 |     return " ".join(_remove_prohibited_words(word)).lower()
 44 | 
 45 | 
 46 | def remove_duplicates(entries):
 47 |     if entries:
 48 |         checked = set()
 49 |         result = []
 50 |         for entry in entries:
 51 |             if entry:
 52 |                 key = entry[1]
 53 |                 if key in checked:
 54 |                     continue
 55 |                 checked.add(key)
 56 |                 result.append(entry)
 57 |         return result
 58 | 
 59 | 
 60 | def remove_containing(entries, prohibited_word):
 61 |     if entries:
 62 |         result = []
 63 |         for entry in entries:
 64 |             if entry:
 65 |                 key = entry[1]
 66 |                 if prohibited_word in key:
 67 |                     continue
 68 |                 result.append(entry)
 69 |         return result
 70 | 
 71 | 
 72 | def remove_nones(entries):
 73 |     if entries:
 74 |         result = []
 75 |         for entry in entries:
 76 |             if entry:
 77 |                 result.append(entry)
 78 |         return result
 79 |     return []
 80 | 
 81 | 
 82 | # RETRIEVING DATA
 83 | 
 84 | 
 85 | @lru_cache(maxsize=20)
 86 | # @cachier(cache_dir=Path("..", "tmp").absolute())
 87 | def _get_data(word, arguments=None):
 88 |     if not arguments:
 89 |         arguments = _DEFAULT_ARGUMENTS
 90 |     splitted_word = _remove_prohibited_words(word)
 91 |     search_term = "_".join(splitted_word)
 92 |     url = URL.format(search_term) + urlencode(arguments, False, "/")
 93 |     start = time.perf_counter()
 94 |     try:
 95 |         result = requests.get(url).json()
 96 |     except Exception as e:
 97 |         logger.warning("conceptnet _get_data timeout: {}".format(e))
 98 |         result = None
 99 |     end = time.perf_counter()
100 |     logger.info(
101 |         "Took {} seconds to poll Conceptnet for '{}'".format(str(end - start), word)
102 |     )
103 |     return result
104 | 
105 | 
106 | def _get_edges(word, arguments=None):
107 |     data = _get_data(word, arguments)
108 |     if data:
109 |         return data["edges"]
110 | 
111 | 
112 | def _get_weight_and_word(edge, word):
113 |     end_label = edge["end"]["label"]
114 |     if not end_label == word:
115 |         return edge["weight"], end_label
116 | 
117 | 
118 | def _get_relation_label(edge):
119 |     return edge["rel"]["label"]
120 | 
121 | 
122 | def _get_from_relation(word, edges, relation_name):
123 |     return remove_nones(
124 |         [
125 |             _get_weight_and_word(edge, word)
126 |             for edge in edges
127 |             if _get_relation_label(edge) == relation_name
128 |         ]
129 |     )
130 | 
131 | 
132 | # EXTRACTING INFO
133 | 
134 | 
135 | def is_english(node):
136 |     return node and (not "language" in node or node["language"] == "en")
137 | 
138 | 
139 | def is_different_enough_label(edge, word):
140 |     label = edge["label"].lower()
141 |     word_lower = word.lower()
142 |     return not label in word_lower and not word_lower in label
143 | 
144 | 
145 | def get_weighted_related_words(word, limit=50):
146 |     edges = _get_edges(word, cache_util.HashableDict(limit=limit))
147 |     starts = [
148 |         (edge["weight"], edge["start"]["label"])
149 |         for edge in edges
150 |         if is_different_enough_label(edge["start"], word) and is_english(edge["start"])
151 |     ]
152 |     ends = [
153 |         (edge["weight"], edge["end"]["label"])
154 |         for edge in edges
155 |         if is_different_enough_label(edge["end"], word) and is_english(edge["end"])
156 |     ]
157 |     result = starts + ends
158 |     return result
159 | 
160 | 
161 | def get_weighted_related_locations(word):
162 |     edges = _get_edges(word, _LOCATION_ARGUMENTS)
163 |     return _get_from_relation(word, edges, "AtLocation")
164 | 
165 | 
166 | def get_weighted_has(word):
167 |     edges = _get_edges(word, _HASA_ARGUMENTS)
168 |     return _get_from_relation(word, edges, "HasA")
169 | 
170 | 
171 | def get_weighted_properties(word):
172 |     edges = _get_edges(word)
173 |     return _get_from_relation(word, edges, "HasProperty")
174 | 
175 | 
176 | def get_weighted_antonyms(word):
177 |     edges = _get_edges(word)
178 |     return _get_from_relation(word, edges, "Antonym")
179 | 
180 | 
181 | # Weighted
182 | weighted_location_generator = generator_util.WeightedGenerator(
183 |     get_weighted_related_locations
184 | )
185 | weighted_antonym_generator = generator_util.WeightedGenerator(get_weighted_antonyms)
186 | weighted_related_word_generator = generator_util.WeightedGenerator(
187 |     get_weighted_related_words
188 | )
189 | 
190 | # Unweighted
191 | unweighted_location_generator = generator_util.UnweightedGenerator(
192 |     get_weighted_related_locations
193 | )
194 | unweighted_antonym_generator = generator_util.UnweightedGenerator(get_weighted_antonyms)
195 | unweighted_related_word_generator = generator_util.UnweightedGenerator(
196 |     get_weighted_related_words
197 | )
198 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/wikihow.py:
--------------------------------------------------------------------------------
  1 | """ Module for interacting with Wikihow """
  2 | import re
  3 | import time
  4 | import logging
  5 | from functools import lru_cache
  6 | from itertools import chain
  7 | from pathlib import Path
  8 | 
  9 | import inflect
 10 | import requests
 11 | from bs4 import BeautifulSoup
 12 | # from cachier import cachier
 13 | 
 14 | from talkgenerator import settings
 15 | 
 16 | logger = logging.getLogger("talkgenerator")
 17 | 
 18 | _LOG_IN_URL = "https://www.wikihow.com/index.php?title=Special:UserLogin&action=submitlogin&type=login"
 19 | _ADVANCED_SEARCH_URL = (
 20 |     "https://www.wikihow.com/index.php?title=Special%3ASearch&profile=default&search={}"
 21 |     "&fulltext=Search&ss=relevance&so=desc&ffriy=1&ffrin=1&fft=ffta&fftsi=&profile=default"
 22 | )
 23 | 
 24 | 
 25 | def _create_log_in_session(username, password):
 26 |     log_in_credentials = {"wpName": username, "wpPassword": password}
 27 |     session = requests.session()
 28 |     max_session_attempts = 16
 29 |     trial = 1
 30 |     success = False
 31 | 
 32 |     while not success and trial < max_session_attempts:
 33 |         try:
 34 |             resp = session.post(_LOG_IN_URL, log_in_credentials, log_in_credentials)
 35 |             if "Unable to continue login." in resp.text:
 36 |                 logger.warning("Requests login failed. Unable to continue login.")
 37 |                 return False
 38 |             else:
 39 |                 success = True
 40 |         except requests.exceptions.ConnectionError:
 41 |             wait_time = 0.25 * 2 ** trial
 42 | 
 43 |             # increment the trial counter
 44 |             trial += 1
 45 |             logger.error(
 46 |                 "Connection error with Wikihow! Retrying in "
 47 |                 + str(wait_time)
 48 |                 + " seconds."
 49 |             )
 50 |             time.sleep(wait_time)
 51 |             return _create_log_in_session(username, password)
 52 | 
 53 |     if trial < max_session_attempts:
 54 |         logger.info("Logged into Wikihow")
 55 |     else:
 56 |         logger.warning("Failed logging into Wikihow")
 57 |     return session
 58 | 
 59 | 
 60 | def get_wikihow_session():
 61 |     wikihow_credentials = settings.wikihow_auth()
 62 |     # if session:
 63 |     #     logger.warning(
 64 |     #         "Found Wikihow Session object in credentials, skipping loggin in"
 65 |     #     )
 66 |     #     return wikihow_credentials["session"]
 67 |     # else:
 68 |     #     logger.warning(
 69 |     #         "No Wikihow Session object in credentials, attempting log in..."
 70 |     #     )
 71 |     session = _create_log_in_session(**wikihow_credentials)
 72 |     wikihow_credentials["session"] = session
 73 |     return session
 74 | 
 75 | 
 76 | def remove_how_to(wikihow_title):
 77 |     index_of_to = wikihow_title.find("to")
 78 |     return wikihow_title[index_of_to + 3 :]
 79 | 
 80 | 
 81 | def clean_wikihow_action(action):
 82 |     action = _remove_between_brackets(action)
 83 |     action = _remove_trademarks(action)
 84 |     action = action.strip()
 85 |     return action
 86 | 
 87 | 
 88 | def _remove_between_brackets(sentence):
 89 |     while True:
 90 |         s_new = re.sub(r"\([^(]*?\)", r"", sentence)
 91 |         if s_new == sentence:
 92 |             break
 93 |         sentence = s_new
 94 |     return sentence
 95 | 
 96 | 
 97 | def _remove_trademarks(action):
 98 |     if " - wikihow.com" in action:
 99 |         return re.sub(" - wikihow.com", "", action)
100 |     return action
101 | 
102 | 
103 | @lru_cache(maxsize=20)
104 | # @cachier(cache_dir=Path("..", "tmp").absolute())
105 | def basic_search_wikihow(search_words):
106 |     return requests.get(
107 |         "https://en.wikihow.com/wikiHowTo?search=" + search_words.replace(" ", "+")
108 |     )
109 | 
110 | 
111 | # wikihow_session = get_wikihow_session()
112 | wikihow_session = None
113 | 
114 | 
115 | @lru_cache(maxsize=20)
116 | # @cachier(cache_dir=Path("..", "tmp").absolute())
117 | def _advanced_search_wikihow(search_words):
118 |     # session = get_wikihow_session()
119 |     if wikihow_session:
120 |         url = _ADVANCED_SEARCH_URL.format(search_words.replace(" ", "+"))
121 |         resp = wikihow_session.get(url, allow_redirects=True)
122 |         if "Login Required - wikiHow" in str(resp.content):
123 |             logger.warning(
124 |                 "WARNING: Problem logging in on Wikihow: Advanced Search disabled"
125 |             )
126 |         return resp
127 |     return None
128 | 
129 | 
130 | def get_related_wikihow_actions_basic_search(seed_word):
131 |     page = basic_search_wikihow(seed_word)
132 |     # Try again but with plural if nothing is found
133 |     if not page:
134 |         page = basic_search_wikihow(inflect.engine().plural(seed_word))
135 | 
136 |     soup = BeautifulSoup(page.content, "html.parser")
137 |     actions_elements = soup.find_all("a", class_="result_link")
138 |     action_titles = list(
139 |         chain.from_iterable(
140 |             [a.find_all("div", "result_title") for a in actions_elements]
141 |         )
142 |     )
143 |     actions = [
144 |         clean_wikihow_action(remove_how_to(x.get_text()))
145 |         for x in action_titles
146 |         if x is not None and not x.get_text().startswith("Category")
147 |     ]
148 |     return actions
149 | 
150 | 
151 | def get_related_wikihow_actions_advanced_search(seed_word):
152 |     page = _advanced_search_wikihow(seed_word)
153 |     # Try again but with plural if nothing is found
154 |     if not page:
155 |         page = _advanced_search_wikihow(inflect.engine().plural(seed_word))
156 |     if page:
157 |         soup = BeautifulSoup(page.content, "html.parser")
158 |         actions_elements = soup.find_all("div", class_="mw-search-result-heading")
159 |         actions = [clean_wikihow_action(x.find("a")["title"]) for x in actions_elements]
160 |         return actions
161 |     return []
162 | 
163 | 
164 | def get_related_wikihow_actions(seed_word):
165 |     """ Uses the advanced search unless it doesn't return anything """
166 |     # actions = get_related_wikihow_actions_advanced_search(seed_word)
167 |     # if actions:
168 |     #     return actions
169 |     return get_related_wikihow_actions_basic_search(seed_word)
170 | 


--------------------------------------------------------------------------------
/talkgenerator/schema/slide_topic_generators.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing
  2 | import random
  3 | import logging
  4 | from functools import lru_cache
  5 | from typing import List, Collection
  6 | 
  7 | from talkgenerator.sources import conceptnet, phrasefinder
  8 | from talkgenerator.util import language_util, random_util
  9 | 
 10 | # == TOPIC GENERATORS ==
 11 | 
 12 | logger = logging.getLogger("talkgenerator")
 13 | 
 14 | 
 15 | class SlideSeedGenerator:
 16 |     def get_seed(self, slide_nr: int) -> str:
 17 |         raise NotImplementedError("")
 18 | 
 19 | 
 20 | class SideTrackingTopicGenerator(SlideSeedGenerator):
 21 |     """ This generator will make small side tracks around topics, but keeps returning every X slides"""
 22 | 
 23 |     def __init__(
 24 |         self, topics: List[str], num_slides: int, topic_return_period_range=range(3, 6)
 25 |     ):
 26 |         self._topics = topics
 27 |         self._num_slides = num_slides
 28 | 
 29 |         seeds: List[str] = [None] * num_slides
 30 | 
 31 |         # Make it begin and end with the topic
 32 |         if num_slides > 0:
 33 |             # End with main topic
 34 |             seeds[-1] = topics[0]
 35 | 
 36 |         if len(topics) == 1:
 37 |             # Add the returning topic if only one topic given
 38 |             idx = 0
 39 |             while idx < num_slides:
 40 |                 seeds[idx] = topics[0]
 41 |                 idx += random.choice(topic_return_period_range)
 42 |         else:
 43 |             # Disperse all topics over the slides if multiple topics given
 44 |             _disperse(seeds, topics, 0, num_slides - 1)
 45 | 
 46 |         # Fill in the blanks with related topics
 47 |         previous = seeds.copy()
 48 |         while None in seeds:
 49 |             fill_in_blank_topics_with_related(seeds)
 50 |             logger.info("SideTrackingTopicGenerator concept seeds: {}".format(seeds))
 51 |             if seeds == previous:
 52 |                 fill_in_blanks_with(seeds, topics[0])
 53 |                 break
 54 |             previous = seeds.copy()
 55 | 
 56 |         # Convert None's to literal none's for debugging purposes
 57 |         seeds = [seed if seed else "None" for seed in seeds]
 58 | 
 59 |         self._seeds = seeds
 60 | 
 61 |     def get_seed(self, slide_nr: int) -> str:
 62 |         return self._seeds[slide_nr]
 63 | 
 64 |     def all_seeds(self):
 65 |         return self._seeds
 66 | 
 67 | 
 68 | def _disperse(seeds, topics, min_idx, max_idx):
 69 |     range_size = max_idx - min_idx + 1
 70 |     step_size = range_size / len(topics)
 71 |     for i in range(len(topics)):
 72 |         seeds_index = int(min_idx + step_size * i)
 73 |         seeds[seeds_index] = topics[i]
 74 | 
 75 | 
 76 | def fill_in_blank_topics_with_related(seeds, distance=1):
 77 |     for i in range(len(seeds)):
 78 |         _fill_in(seeds, i)
 79 | 
 80 | 
 81 | def fill_in_blanks_with(seeds, topic):
 82 |     for i in range(len(seeds)):
 83 |         if not seeds[i]:
 84 |             seeds[i] = topic
 85 | 
 86 | 
 87 | def normalise_weighted_word(weighted_word):
 88 |     return weighted_word[0], normalise_seed(weighted_word[1])
 89 | 
 90 | 
 91 | def _fill_in(seeds, i, distance=1):
 92 |     if seeds[i] is None:
 93 | 
 94 |         # Check for neighbours
 95 |         if i - distance >= 0 and seeds[i - distance]:
 96 |             neighbour = seeds[i - distance]
 97 | 
 98 |             try:
 99 |                 related = conceptnet.get_weighted_related_words(neighbour, 25)
100 |                 if len(related) == 0:
101 |                     related = conceptnet.get_weighted_related_words(
102 |                         normalise_seed(neighbour), 25
103 |                     )
104 | 
105 |             except Exception as e:
106 |                 logger.info("Conceptnet related words failing: {}".format(e))
107 |                 related = []
108 | 
109 |             normalised_related = map(normalise_weighted_word, related)
110 |             # pool = multiprocessing.Pool()
111 |             # normalised_related = pool.map(normalise_weighted_word, related)
112 |             # pool.close()
113 | 
114 |             filtered_related = [
115 |                 weighted_word
116 |                 for weighted_word in normalised_related
117 |                 if not weighted_word[1] in seeds and len(weighted_word[1]) > 2
118 |             ]
119 | 
120 |             if len(filtered_related) > 0:
121 |                 seeds[i] = normalise_seed(random_util.weighted_random(filtered_related))
122 | 
123 |             # Check if still unassigned
124 |             if seeds[i] is None:
125 |                 _fill_in(seeds, i, distance + 1)
126 | 
127 | 
128 | @lru_cache(maxsize=300)
129 | def normalise_seed(seed):
130 |     normalised = conceptnet.normalise(seed).lower()
131 |     normalised = language_util.replace_non_alphabetical_characters(normalised)
132 |     # if " " in normalised:
133 |     #     rarest_word = phrasefinder.get_rarest_word(normalised)
134 |     #     if rarest_word is not None:
135 |     #         normalised = rarest_word
136 |     #     else:
137 |     #         last_word = normalised.split(" ")[-1]
138 |     #         normalised = last_word
139 | 
140 |     logger.info("Mapping seed '" + seed + "' => " + normalised)
141 |     return normalised
142 | 
143 | 
144 | class IdentityTopicGenerator(SlideSeedGenerator):
145 |     """ Generates always the given topic as the seed for each slide """
146 | 
147 |     def __init__(self, topics: Collection[str], _):
148 |         self._topics = topics
149 | 
150 |     def get_seed(self, _) -> str:
151 |         return random.choice(self._topics)
152 | 
153 | 
154 | # class SynonymTopicGenerator:
155 | #     """ Generates a bunch of related words (e.g. synonyms) of a word to generate topics for a presentation"""
156 | #
157 | #     def __init__(self, topic, number_of_slides):
158 | #         self._topic = topic
159 | #         self._slides_nr = number_of_slides
160 | #         synonyms = language_util.get_synonyms(topic)
161 | #         # seeds.extend(get_relations(topic))
162 | #
163 | #         # Check if enough generated
164 | #         if len(synonyms) < number_of_slides:
165 | #             # If nothing: big problem!
166 | #             if len(synonyms) == 0:
167 | #                 synonyms = [topic]
168 | #
169 | #             # Now fill the seeds up with repeating topics
170 | #             number_of_repeats = int(math.ceil(number_of_slides / len(synonyms)))
171 | #             synonyms = numpy.tile(synonyms, number_of_repeats)
172 | #
173 | #         # Take random `number_of_slides` elements
174 | #         random.shuffle(synonyms)
175 | #         self._seeds = synonyms[0: number_of_slides]
176 | #
177 | #     def generate_seed(self, slide_nr):
178 | #         return self._seeds[slide_nr]
179 | 


--------------------------------------------------------------------------------
/talkgenerator/util/language_util.py:
--------------------------------------------------------------------------------
  1 | """ Module providing language-related operations to manipulate strings"""
  2 | import logging
  3 | import re
  4 | import string
  5 | 
  6 | import inflect
  7 | import nltk
  8 | 
  9 | logger = logging.getLogger("talkgenerator")
 10 | 
 11 | 
 12 | def check_and_download():
 13 |     required_corpus_list = ["tokenizers/punkt", "taggers/averaged_perceptron_tagger"]
 14 |     try:
 15 |         for corpus in required_corpus_list:
 16 |             _check_and_download_corpus(corpus, corpus.split("/")[1])
 17 |     except Exception as e:
 18 |         logging.error(e)
 19 |         print_corpus_download_warning()
 20 |         return False
 21 | 
 22 |     return True
 23 | 
 24 | 
 25 | def _check_and_download_corpus(corpus_fullname, corpus_shortname):
 26 |     try:
 27 |         nltk.data.find(corpus_fullname)
 28 |     except LookupError as le:
 29 |         logging.error(le)
 30 |         nltk.download(corpus_shortname)
 31 | 
 32 | 
 33 | def print_corpus_download_warning():
 34 |     corpus_warning = """
 35 |     Hmm...
 36 |     ---------------------
 37 | 
 38 |     We had some trouble downloading the NLTK corpuses..
 39 |     Try running the following from a command line. This should
 40 |     download the needed packages.. but it might also tell you if
 41 |     there is another issue.
 42 | 
 43 |     $ python3 -m nltk.downloader punkt averaged_perceptron_tagger
 44 |     """
 45 |     logger.warning(corpus_warning)
 46 | 
 47 | 
 48 | # Helpers
 49 | 
 50 | 
 51 | def _replace_word_one_case(sentence, word, replacement, flags=0):
 52 |     return re.sub(
 53 |         r"(^|\W)" + word + r"(\W|$)", r"\1" + replacement + r"\2", sentence, flags=flags
 54 |     )
 55 | 
 56 | 
 57 | def replace_word(sentence, word, replacement):
 58 |     lowered = _replace_word_one_case(sentence, word.lower(), replacement.lower())
 59 |     upper = _replace_word_one_case(lowered, word.upper(), replacement.upper())
 60 |     titled = _replace_word_one_case(upper, word.title(), replacement.title())
 61 |     result = _replace_word_one_case(titled, word, replacement, re.I)
 62 |     return result
 63 | 
 64 | 
 65 | def get_pos_tags(word):
 66 |     """ Returns all possible POS tags for a given word according to nltk """
 67 |     tags = nltk.pos_tag(nltk.word_tokenize(word))
 68 |     tags_strings = [tag[1] for tag in tags]
 69 |     # print(word, ":", tags_strings)
 70 |     return tags_strings
 71 | 
 72 | 
 73 | # Verbs
 74 | 
 75 | 
 76 | def get_verb_index(words):
 77 |     seen_adverb = False
 78 |     for i in range(len(words)):
 79 |         tags = get_pos_tags(words[i])
 80 |         # Is verb: return
 81 |         if "VB" in tags:
 82 |             return i
 83 |         # Is adverb: return next non adverb
 84 |         if "RB" in tags:
 85 |             seen_adverb = True
 86 |             continue
 87 |         # Something following an adverb thats not an adverb? See as verb
 88 |         if seen_adverb:
 89 |             return i
 90 |     return 0
 91 | 
 92 | 
 93 | def apply_function_to_verb(action, func):
 94 |     words = action.split(" ")
 95 |     verb_index = get_verb_index(words)
 96 |     first_word = func(words[verb_index])
 97 |     if len(words) == 1:
 98 |         return first_word
 99 |     return (
100 |         " ".join(words[:verb_index])
101 |         + " "
102 |         + first_word
103 |         + " "
104 |         + " ".join(words[verb_index + 1 :])
105 |     ).strip()
106 | 
107 | 
108 | def to_present_participle(action):
109 |     return apply_function_to_verb(action, to_ing_form)
110 | 
111 | 
112 | # From https://github.com/arsho/46-Simple-Python-Exercises-Solutions/blob/master/problem_25.py
113 | def _make_ing_form(passed_string):
114 |     passed_string = passed_string.lower()
115 |     letter = list(string.ascii_lowercase)
116 |     vowel = ["a", "e", "i", "o", "u"]
117 |     consonant = [c for c in letter if c not in vowel]
118 |     exception = ["be", "see", "flee", "knee", "lie"]
119 | 
120 |     if passed_string.endswith("ie"):
121 |         passed_string = passed_string[:-2]
122 |         return passed_string + "ying"
123 | 
124 |     elif passed_string.endswith("e"):
125 |         if passed_string in exception:
126 |             return passed_string + "ing"
127 |         else:
128 |             passed_string = passed_string[:-1]
129 |             return passed_string + "ing"
130 | 
131 |     elif passed_string.endswith("y") or passed_string.endswith("w"):
132 |         return passed_string + "ing"
133 | 
134 |     elif (
135 |         len(passed_string) >= 3
136 |         and passed_string[-1] in consonant
137 |         and passed_string[-2] in vowel
138 |         and passed_string[-3] in consonant
139 |     ):
140 |         passed_string += passed_string[-1]
141 |         return passed_string + "ing"
142 |     else:
143 |         return passed_string + "ing"
144 | 
145 | 
146 | def to_ing_form(passed_string):
147 |     result = _make_ing_form(passed_string)
148 |     if passed_string.islower():
149 |         return result.lower()
150 |     if passed_string.isupper():
151 |         return result.upper()
152 |     if passed_string.istitle():
153 |         return result.title()
154 |     return result
155 | 
156 | 
157 | inflect_engine = inflect.engine()
158 | 
159 | 
160 | def is_singular(word):
161 |     return inflect_engine.singular_noun(word) is False
162 | 
163 | 
164 | def is_plural(word):
165 |     return bool(inflect_engine.singular_noun(word))
166 | 
167 | 
168 | def to_plural(word):
169 |     if is_singular(word):
170 |         if word.startswith("a "):
171 |             word = word[2:]
172 |         return inflect_engine.plural(word)
173 |     return word
174 | 
175 | 
176 | def to_singular(word):
177 |     if is_plural(word):
178 |         return inflect_engine.singular_noun(word)
179 |     return word
180 | 
181 | 
182 | def add_article(word):
183 |     # TODO: Maybe more checks, some u's cause "an", or some big letters in case it's an abbreviation
184 |     word_lower = word.lower()
185 |     article = "a"
186 |     if (
187 |         word_lower.startswith("a")
188 |         or word_lower.startswith("e")
189 |         or word_lower.startswith("i")
190 |         or word_lower.startswith("o")
191 |     ):
192 |         article = "an"
193 |     return article + " " + word
194 | 
195 | 
196 | # Pronouns
197 | 
198 | 
199 | def second_to_first_pronouns(sentence):
200 |     sentence = replace_word(sentence, "yours", "mine")
201 |     sentence = replace_word(sentence, "your", "my")
202 |     sentence = replace_word(sentence, "you", "me")
203 |     return sentence
204 | 
205 | 
206 | # POS tag checkers
207 | 
208 | # TODO: These don't work well, but might be useful features in our text generation language
209 | def is_noun(word):
210 |     return "NN" in get_pos_tags(word)
211 | 
212 | 
213 | def is_verb(word):
214 |     return "VB" in get_pos_tags(word)
215 | 
216 | 
217 | # Special operators
218 | 
219 | 
220 | def get_last_noun_and_article(sentence):
221 |     tokens = nltk.word_tokenize(sentence)
222 |     tags = nltk.pos_tag(tokens)
223 | 
224 |     noun = None
225 |     for tag in reversed(tags):
226 |         if "NN" in tag[1]:
227 |             if noun:
228 |                 noun = (tag[0] + " " + noun).strip()
229 |             else:
230 |                 noun = tag[0]
231 | 
232 |         # If encountering an article while there is a noun found
233 |         elif bool(noun):
234 |             if "DT" in tag[1] or "PRP$" in tag[1]:
235 |                 return tag[0] + " " + noun
236 |             return noun
237 | 
238 |     return None
239 | 
240 | 
241 | def replace_non_alphabetical_characters(text):
242 |     return re.sub(r"[^A-Za-z\s\b -]+", "", text)
243 | 
244 | 
245 | def is_vowel(character):
246 |     return character in ["a", "e", "i", "o,", "u"]
247 | 
248 | 
249 | def is_consonant(character):
250 |     return not is_vowel(character)
251 | 


--------------------------------------------------------------------------------
/talkgenerator/generator.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import pathlib
  4 | import random
  5 | import subprocess
  6 | import sys
  7 | import logging
  8 | from typing import List, Union, Tuple, Optional
  9 | 
 10 | from pptx import Presentation
 11 | 
 12 | from talkgenerator.slide.slide_deck import SlideDeck
 13 | from talkgenerator.schema.content_generators import full_name_generator
 14 | from talkgenerator.schema.presentation_schema_types import get_schema
 15 | from talkgenerator import runtime_checker
 16 | from talkgenerator.sources import phrasefinder
 17 | from talkgenerator.util import os_util
 18 | 
 19 | DEFAULT_PRESENTATION_TOPIC = "cat"
 20 | MAX_PRESENTATION_SAVE_TRIES = 100
 21 | 
 22 | logger = logging.getLogger("talkgenerator")
 23 | 
 24 | 
 25 | def generate_presentation_using_cli_arguments(args) -> Tuple[Presentation, SlideDeck, str]:
 26 |     """Make a talk with the given topic."""
 27 | 
 28 |     runtime_checker.check_runtime_environment()
 29 | 
 30 |     # Print status details
 31 |     logger.info("******************************************")
 32 |     logger.info("Making {} slide talk on: {}".format(args.num_slides, args.topic))
 33 | 
 34 |     return generate_presentation(
 35 |         schema=args.schema,
 36 |         slides=args.num_slides,
 37 |         topic=args.topic,
 38 |         title=args.title,
 39 |         presenter=args.presenter,
 40 |         parallel=args.parallel,
 41 |         int_seed=args.int_seed,
 42 |         print_logs=args.print_logs,
 43 |         save_ppt=args.save_ppt,
 44 |         open_ppt=args.open_ppt,
 45 |     )
 46 | 
 47 | 
 48 | def generate_presentation(
 49 |     schema: str,
 50 |     slides: int,
 51 |     topic: Union[str, List[str]] = None,
 52 |     title: str = None,
 53 |     presenter: str = None,
 54 |     parallel: bool = True,
 55 |     int_seed: int = None,
 56 |     save_ppt: bool = True,
 57 |     output_folder: str = "../output/",
 58 |     open_ppt: bool = False,
 59 |     print_logs=False,
 60 | ) -> Tuple[Presentation, SlideDeck, str]:
 61 | 
 62 |     logger.info('**************************')
 63 |     logger.info('Generating presentation...')
 64 |     if print_logs:
 65 |         os_util.show_logs(logger)
 66 | 
 67 |     if int_seed is not None:
 68 |         random.seed(int_seed)
 69 | 
 70 |     # Retrieve the schema to generate the presentation with
 71 |     presentation_schema = get_schema(schema)
 72 |     logger.info('Presentation schema: {}'.format(presentation_schema))
 73 | 
 74 |     # Generate random presenter name if no presenter name given
 75 |     if not presenter:
 76 |         presenter = full_name_generator()
 77 | 
 78 |     if not topic:
 79 |         if title:
 80 |             topic = phrasefinder.get_rarest_word(title)
 81 |         else:
 82 |             topic = DEFAULT_PRESENTATION_TOPIC
 83 | 
 84 |     # Extract topics from given (possibly comma separated) topic
 85 |     if type(topic) in [list, tuple]:
 86 |         topics = topic
 87 |     else:
 88 |         topics = [topic.strip() for topic in topic.split(",")]
 89 | 
 90 |     logger.info('Presentation topics: {}'.format(topics))
 91 |     logger.info('Presentation num_slides: {}'.format(slides))
 92 |     logger.info('Presentation presenter: {}'.format(presenter))
 93 |     logger.info('Presentation title: {}'.format(title))
 94 |     logger.info('Presentation parallel: {}'.format(parallel))
 95 |     logger.info('Presentation int_seed: {}'.format(int_seed))
 96 |     logger.info('Presentation save_ppt: {}'.format(save_ppt))
 97 | 
 98 |     # Generate the presentation object
 99 |     presentation, slide_deck = presentation_schema.generate_presentation(
100 |         topics=topics,
101 |         num_slides=slides,
102 |         presenter=presenter,
103 |         title=title,
104 |         parallel=parallel,
105 |         int_seed=int_seed,
106 |         save_ppt=save_ppt,
107 |     )
108 | 
109 |     logger.info('**************************')
110 |     logger.info('Presentation generated: {}'.format(presentation))
111 |     logger.info('Slide deck generated: {}'.format(slide_deck))
112 | 
113 |     cleaned_topics = ",".join(topics).replace(" ", "").replace(",", "_")
114 |     file_name = "".join(e for e in cleaned_topics if e.isalnum() or e == "_")
115 | 
116 |     logger.info(
117 |         "Slide deck structured data: {}".format(slide_deck.get_structured_data())
118 |     )
119 | 
120 |     # Save presentation
121 |     presentation_file = None
122 |     if save_ppt:
123 |         presentation_file = save_presentation_to_pptx(
124 |             output_folder, file_name, presentation
125 |         )
126 | 
127 |         # Open the presentation
128 |         if open_ppt and presentation_file is not None:
129 |             path = os.path.realpath(presentation_file)
130 |             _open_file(path)
131 | 
132 |     return presentation, slide_deck, presentation_file
133 | 
134 | 
135 | def save_presentation_to_pptx(output_folder: str, file_name: str, prs, index=0) -> Optional[str]:
136 |     """Save the talk."""
137 |     if index > MAX_PRESENTATION_SAVE_TRIES:
138 |         return None
139 | 
140 |     suffix = "_" + str(index) if index > 0 else ""
141 |     fp: str = os.path.join(output_folder, str(file_name) + str(suffix) + ".pptx")
142 | 
143 |     # If file already exists, don't overwrite it:
144 |     if pathlib.Path(fp).is_file():
145 |         return save_presentation_to_pptx(output_folder, file_name, prs, index + 1)
146 | 
147 |     # Create the parent folder if it doesn't exist
148 |     pathlib.Path(os.path.dirname(fp)).mkdir(parents=True, exist_ok=True)
149 | 
150 |     try:
151 |         prs.save(fp)
152 |         logger.info("Saved talk to {}".format(fp))
153 |         return fp
154 |     except PermissionError:
155 |         return save_presentation_to_pptx(output_folder, file_name, prs, index + 1)
156 | 
157 | 
158 | def _open_file(filename: str):
159 |     """Platform independent open method to cover different OS."""
160 |     if sys.platform == "win32":
161 |         os.startfile(filename)
162 |     else:
163 |         opener = "open" if sys.platform == "darwin" else "xdg-open"
164 |         subprocess.call([opener, filename])
165 | 
166 | 
167 | def str2bool(v):
168 |     # stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse
169 |     if v.lower() in ("yes", "true", "t", "y", "1"):
170 |         return True
171 |     elif v.lower() in ("no", "false", "f", "n", "0"):
172 |         return False
173 |     else:
174 |         raise argparse.ArgumentTypeError("Boolean value expected.")
175 | 
176 | 
177 | def get_argument_parser():
178 |     parser = argparse.ArgumentParser(description="Quickly build a slide deck.")
179 |     parser.add_argument("--topic", default="", type=str, help="Topic of presentation.")
180 |     parser.add_argument(
181 |         "--num_slides",
182 |         "--slides",
183 |         default=10,
184 |         type=int,
185 |         help="Number of slides to create.",
186 |     )
187 |     parser.add_argument(
188 |         "--int_seed",
189 |         default=None,
190 |         type=int,
191 |         help="Seed used for random.seed(int_seed). Fill in any number to add more consistency between runs.",
192 |     )
193 |     parser.add_argument(
194 |         "--schema",
195 |         default="default",
196 |         type=str,
197 |         help="The presentation schema to generate the presentation with",
198 |     )
199 |     parser.add_argument(
200 |         "--presenter",
201 |         default=None,
202 |         type=str,
203 |         help="The full name of the presenter, leave blank to randomise",
204 |     )
205 |     parser.add_argument(
206 |         "--title",
207 |         default=None,
208 |         type=str,
209 |         help="The title of the talk, leave blank to randomise",
210 |     )
211 |     parser.add_argument(
212 |         "--parallel",
213 |         default=True,
214 |         type=str2bool,
215 |         help=(
216 |             "Generated powerpoint will generate in parallel "
217 |             + "faster but drops some conditions)"
218 |         ),
219 |     )
220 |     parser.add_argument(
221 |         "--print_logs",
222 |         default=True,
223 |         type=str2bool,
224 |         help="Print logs about the generation process.",
225 |     )
226 |     parser.add_argument(
227 |         "--output_folder",
228 |         default="../output/",
229 |         type=str,
230 |         help="The folder to output the generated presentations",
231 |     )
232 |     parser.add_argument(
233 |         "--save_ppt",
234 |         default=True,
235 |         type=str2bool,
236 |         help="If this flag is true, the generated powerpoint will be saved",
237 |     )
238 |     parser.add_argument(
239 |         "--open_ppt",
240 |         default=True,
241 |         type=str2bool,
242 |         help="Generated powerpoint will automatically open",
243 |     )
244 |     return parser
245 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/text_generator.py:
--------------------------------------------------------------------------------
  1 | """ This module helps out with generating text using templates """
  2 | import json
  3 | import random
  4 | import re
  5 | from functools import lru_cache
  6 | 
  7 | import tracery
  8 | from tracery.modifiers import base_english
  9 | 
 10 | from talkgenerator.sources import conceptnet
 11 | from talkgenerator.sources import phrasefinder
 12 | from talkgenerator.sources import wikihow
 13 | from talkgenerator.util import language_util
 14 | from talkgenerator.util import os_util
 15 | from talkgenerator.util import random_util
 16 | 
 17 | known_functions = {
 18 |     "title": str.title,
 19 |     "lower": str.lower,
 20 |     "upper": str.upper,
 21 |     "dashes": lambda words: words.replace(" ", "-"),
 22 |     "first_letter": lambda words: words[0],
 23 |     "last_letter_is_vowel": lambda word: word
 24 |     if language_util.is_vowel(word[-1])
 25 |     else None,
 26 |     "last_letter_is_consonant": lambda word: word
 27 |     if language_util.is_consonant(word[-1])
 28 |     else None,
 29 |     "a": lambda word: language_util.add_article(word),
 30 |     "ing": language_util.to_present_participle,
 31 |     "plural": language_util.to_plural,
 32 |     "singular": language_util.to_singular,
 33 |     # "synonym": generator_util.FromListGenerator(language_util.get_synonyms),
 34 |     "2_to_1_pronouns": language_util.second_to_first_pronouns,
 35 |     "wikihow_action": lambda seed: random_util.choice_optional(
 36 |         wikihow.get_related_wikihow_actions(seed)
 37 |     ),
 38 |     "get_last_noun_and_article": language_util.get_last_noun_and_article,
 39 |     # Conceptnet
 40 |     "conceptnet_location": conceptnet.weighted_location_generator,
 41 |     "conceptnet_related": conceptnet.weighted_related_word_generator,
 42 |     "conceptnet_related_single_word": lambda word: phrasefinder.get_rarest_word(
 43 |         conceptnet.weighted_related_word_generator(word)
 44 |     ),
 45 |     # Checkers
 46 |     "is_noun": lambda word: word if language_util.is_noun(word) else None,
 47 |     "is_verb": lambda word: word if language_util.is_verb(word) else None,
 48 |     # Unique: To make a variable not be the same as something else with the same parameters
 49 |     "unique": lambda x: x,
 50 | }
 51 | 
 52 | 
 53 | class AbstractTextGenerator(object):
 54 |     def generate(self, variables_dictionary):
 55 |         raise NotImplementedError()
 56 | 
 57 |     def generate_with_seed(self, seed):
 58 |         return self.generate({"seed": seed})
 59 | 
 60 | 
 61 | class TemplatedTextGenerator(AbstractTextGenerator):
 62 |     def __init__(self, template_file=None, templates_list=None):
 63 |         templates = []
 64 |         if template_file:
 65 |             templates.extend(read_lines(template_file))
 66 |         if templates_list:
 67 |             templates.extend(templates_list)
 68 |         # Create a tuple so no templates can accidentally be deleted from the generator
 69 |         self._templates = tuple(templates)
 70 | 
 71 |     def generate(self, variables_dictionary=None):
 72 |         """ Generates a text from the templates using the given variables dictionary"""
 73 |         # Set empty dictionary if none is given
 74 |         if not bool(variables_dictionary):
 75 |             variables_dictionary = {}
 76 |         # Create a mutable copy of the templates list
 77 |         possible_templates = list(self._templates)
 78 |         for i in range(len(possible_templates)):
 79 |             template = random.choice(possible_templates)
 80 |             if can_format_with(template, variables_dictionary):
 81 |                 result = apply_variables_to_template(template, variables_dictionary)
 82 |                 if result:
 83 |                     return result
 84 |             # Remove the template from the possible templates list, such that it won
 85 |             possible_templates.remove(template)
 86 | 
 87 | 
 88 | class TraceryTextGenerator(AbstractTextGenerator):
 89 |     def __init__(self, tracery_json, variable="origin"):
 90 |         with open(os_util.to_actual_file(tracery_json)) as grammar_file:
 91 |             grammar = get_tracery_grammar(grammar_file)
 92 |             grammar.add_modifiers(base_english)
 93 |             self._grammar = grammar
 94 |             self._variable = variable
 95 | 
 96 |     def generate(self, variables_dictionary=None):
 97 |         """ Generates a text from internal tracery grammar using the given variables dictionary"""
 98 |         # Set empty dictionary if none is given
 99 |         if not bool(variables_dictionary):
100 |             variables_dictionary = {}
101 | 
102 |         # Generate
103 |         for i in range(100):  # TODO prune the grammar instead of retrying
104 |             template = self._grammar.flatten("#" + self._variable + "#")
105 |             if can_format_with(template, variables_dictionary):
106 |                 result = apply_variables_to_template(template, variables_dictionary)
107 |                 if result:
108 |                     return result
109 | 
110 | 
111 | @lru_cache(maxsize=20)
112 | def get_tracery_grammar(grammar_file):
113 |     return tracery.Grammar(json.load(grammar_file))
114 | 
115 | 
116 | def can_format_with(template, variables_dictionary):
117 |     """ Checks if the template can be fully formatted by the given variable dictionary without errors"""
118 |     format_variables = get_format_variables(template)
119 |     return (len(format_variables) == 0 and len(variables_dictionary) == 0) or set(
120 |         format_variables
121 |     ) <= set(variables_dictionary.keys())
122 | 
123 | 
124 | def get_format_variables(template):
125 |     """ Finds all the names of the variables used in the template """
126 |     return {x[0] for x in get_format_variables_and_functions(template)}
127 | 
128 | 
129 | def get_format_variables_and_functions(template):
130 |     """ Finds all the names of the variables used in the template with their functions in a large tuple"""
131 |     matches = re.findall(r"{(\w+)((?:[.]\w+)*)}", template)
132 |     return set(matches)
133 | 
134 | 
135 | def apply_variables_to_template(template, variables_dictionary):
136 |     variables_and_functions = get_format_variables_and_functions(template)
137 |     applied = apply_functions_to_variables(
138 |         template, variables_dictionary, variables_and_functions
139 |     )
140 |     if applied:
141 |         (template, variables_dictionary) = applied
142 |         return template.format(**variables_dictionary)
143 | 
144 | 
145 | def apply_functions(variable, functions):
146 |     """ Applies a list of functions to a variable """
147 |     result = variable
148 |     for func in functions:
149 |         # Check if it transformed the result into None
150 |         if result is None:
151 |             return None
152 | 
153 |         if func in known_functions:
154 |             result = known_functions[func](result)
155 |         # Check if it is a dictionary, as is allowed in real str.format
156 |         elif isinstance(result, dict) and func in result:
157 |             result = result[func]
158 |         # Unique identifier to make similar functions on a variable have different effects
159 |         elif func.isdigit():
160 |             result = result
161 |         else:
162 |             raise ValueError("Unknown function:", func)
163 | 
164 |     return result
165 | 
166 | 
167 | def apply_functions_to_variables(
168 |     template, variables_dictionary, variables_and_functions
169 | ):
170 |     """ Applies the functions of the variables_and_functions tuple and stores them in the variable dictionary and
171 |     updates the template """
172 |     variables_and_functions = list(variables_and_functions)
173 |     variables_and_functions.sort(key=lambda a: len(a), reverse=True)
174 | 
175 |     for var_func in variables_and_functions:
176 |         # Check if it has functions to apply
177 |         if len(var_func) > 1 and len(var_func[1]) > 0:
178 |             old_var_name = var_func[0] + var_func[1]
179 |             functions = var_func[1][1:].split(".")
180 |             variable_name = var_func[0]
181 |             variable = variables_dictionary[variable_name]
182 |             applied_functions = apply_functions(variable, functions)
183 |             if applied_functions is not None:
184 |                 applied_var_name = old_var_name.replace(".", "_")
185 |                 # Replace all occurrences with the dot to the underscore notation
186 |                 template = template.replace(old_var_name, applied_var_name)
187 |                 # Store in dictionary
188 |                 variables_dictionary[applied_var_name] = applied_functions
189 |             else:
190 |                 return None
191 | 
192 |     return template, variables_dictionary
193 | 
194 | 
195 | def read_lines(filename):
196 |     """ Reads all the string lines from a file """
197 |     return os_util.read_lines(filename)
198 | 


--------------------------------------------------------------------------------
/talkgenerator/slide/powerpoint_slide_creator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import logging
  4 | from functools import lru_cache
  5 | from io import BytesIO
  6 | from pathlib import Path
  7 | from typing import List
  8 | 
  9 | import requests
 10 | import PIL
 11 | from PIL import Image
 12 | from PIL import UnidentifiedImageError
 13 | from lxml.etree import XMLSyntaxError
 14 | from pptx import Presentation
 15 | 
 16 | from talkgenerator.datastructures.image_data import ImageData
 17 | from talkgenerator.util import os_util
 18 | 
 19 | # Location of powerpoint template
 20 | _POWERPOINT_TEMPLATE_FILE = "data/powerpoint/template.pptx"
 21 | 
 22 | logger = logging.getLogger("talkgenerator")
 23 | 
 24 | 
 25 | @lru_cache(maxsize=1)
 26 | def get_powerpoint_template_file():
 27 |     return os_util.to_actual_file(_POWERPOINT_TEMPLATE_FILE)
 28 | 
 29 | 
 30 | SOURCES_PLACEHOLDER = 10
 31 | 
 32 | # Layouts index in template
 33 | LAYOUT_TITLE_SLIDE = 0
 34 | LAYOUT_TITLE_AND_CONTENT = 1
 35 | LAYOUT_SECTION_HEADER = 2
 36 | LAYOUT_TWO_CONTENT = 3
 37 | LAYOUT_TWO_TITLE_AND_CONTENT = 4
 38 | LAYOUT_TITLE_ONLY = 5
 39 | LAYOUT_BLANK = 6
 40 | LAYOUT_CONTENT_CAPTION = 7
 41 | LAYOUT_PICTURE_CAPTION = 8
 42 | LAYOUT_FULL_PICTURE = 11
 43 | LAYOUT_TITLE_AND_PICTURE = 12
 44 | LAYOUT_LARGE_QUOTE = 13
 45 | LAYOUT_TWO_TITLE_AND_IMAGE = 14
 46 | LAYOUT_THREE_TITLE_AND_IMAGE = 15
 47 | LAYOUT_TITLE_AND_CHART = 16
 48 | 
 49 | 
 50 | # = HELPERS =
 51 | class FileLikeImage:
 52 |     def get_file_like(self):
 53 |         raise NotImplementedError()
 54 | 
 55 |     def image(self):
 56 |         raise NotImplementedError()
 57 | 
 58 | 
 59 | class ExternalImage(FileLikeImage):
 60 |     def __init__(self, url):
 61 |         self._url = url
 62 | 
 63 |     @lru_cache()
 64 |     def get_bytes_io(self):
 65 |         response = requests.get(self._url)
 66 |         tmp_img = BytesIO(response.content)
 67 |         return tmp_img
 68 | 
 69 |     def get_file_like(self):
 70 |         return self.get_bytes_io()
 71 | 
 72 |     def image(self):
 73 |         open_image = None
 74 |         try:
 75 |             open_image = Image.open(self.get_bytes_io())
 76 |         except PIL.UnidentifiedImageError as e:
 77 |             logging.error(e)
 78 |             logging.error('PIL.UnidentifiedImageError')
 79 |         return open_image
 80 | 
 81 | 
 82 | class InternalImage(FileLikeImage):
 83 |     def __init__(self, file_location):
 84 |         self._file_location = file_location
 85 | 
 86 |     def get_file_like(self):
 87 |         return self._file_location
 88 | 
 89 |     def image(self):
 90 |         return Image.open(self._file_location())
 91 | 
 92 | # CREATION
 93 | def _create_slide(prs, slide_type):
 94 |     """ Creates a new slide in the given presentation using the slide_type template """
 95 |     return prs.slides.add_slide(prs.slide_layouts[slide_type])
 96 | 
 97 | 
 98 | def _add_title(slide, title):
 99 |     """ Adds the given title to the slide if the title is present"""
100 |     if title:
101 |         title_object = slide.shapes.title
102 |         title_object.text = title
103 |         return True
104 | 
105 | 
106 | def _add_text(slide, placeholder_id, text):
107 |     if text:
108 |         placeholder = slide.placeholders[placeholder_id]
109 |         placeholder.text = str(text)
110 |         return True
111 | 
112 | 
113 | def is_external_url(url: str):
114 |     return url.startswith("http")
115 | 
116 | 
117 | def _add_image(
118 |     slide, placeholder_id: int, image: ImageData, original_image_size: bool = True
119 | ):
120 |     if isinstance(image, ImageData):
121 |         image_url = image.get_image_url()
122 |     else:
123 |         image_url = image
124 | 
125 |     if is_external_url(image_url):
126 |         image_ref = ExternalImage(image_url)
127 |     else:
128 |         path = Path(image_url).absolute()
129 |         image_ref = InternalImage(str(path))
130 | 
131 |     placeholder = slide.placeholders[placeholder_id]
132 |     if original_image_size:
133 |         # Calculate the image size of the image
134 |         try:
135 |             # Insert the picture
136 |             try:
137 |                 width, height = image_ref.image().size
138 |                 # Make sure the placeholder doesn't zoom in
139 |                 placeholder.height = height
140 |                 placeholder.width = width
141 |                 placeholder = placeholder.insert_picture(image_ref.get_file_like())
142 |                 # Calculate ratios and compare
143 |                 image_ratio = width / height
144 |                 placeholder_ratio = placeholder.width / placeholder.height
145 |                 ratio_difference = placeholder_ratio - image_ratio
146 |                 # Placeholder width too wide:
147 |                 if ratio_difference > 0:
148 |                     difference_on_each_side = ratio_difference / 2
149 |                     placeholder.crop_left = -difference_on_each_side
150 |                     placeholder.crop_right = -difference_on_each_side
151 |                 # Placeholder height too high
152 |                 else:
153 |                     difference_on_each_side = -ratio_difference / 2
154 |                     placeholder.crop_bottom = -difference_on_each_side
155 |                     placeholder.crop_top = -difference_on_each_side
156 |                 return placeholder
157 |             except (ValueError, XMLSyntaxError, AttributeError) as e:
158 |                 logger.error("_add_image error: {}".format(e))
159 |                 return None
160 | 
161 |         except FileNotFoundError as fnfe:
162 |             logger.error("_add_image file not found: {}".format(fnfe))
163 |             return None
164 |     else:
165 |         try:
166 |             return placeholder.insert_picture(image_ref.get_file_like())
167 |         except (OSError, ValueError) as e:
168 |             logger.error(e)
169 |             logger.error("Unexpected error inserting image: {}:{}".format(image, sys.exc_info()[0]))
170 |             return None
171 | 
172 | 
173 | def _add_chart(slide, placeholder_id, chart_type, chart_data):
174 |     placeholder = slide.placeholders[placeholder_id]
175 |     return placeholder.insert_chart(chart_type, chart_data)
176 | 
177 | 
178 | def _add_image_or_text(slide, placeholder_id, image_url_or_text, original_image_size):
179 |     if os_util.is_image(image_url_or_text):
180 |         return _add_image(slide, placeholder_id, image_url_or_text, original_image_size)
181 |     else:
182 |         return _add_text(slide, placeholder_id, image_url_or_text)
183 | 
184 | 
185 | def _print_all_placeholders(slide):
186 |     for shape in slide.placeholders:
187 |         print("%d %s" % (shape.placeholder_format.idx, shape.name))
188 | 
189 | 
190 | def add_sources_note(slide, _sources: List[str]):
191 |     return _add_text(
192 |         slide, SOURCES_PLACEHOLDER, "[Image sources: " + ", ".join(_sources) + "]"
193 |     )
194 | 
195 | 
196 | # FORMAT GENERATORS
197 | # These are functions that get some inputs (texts, images...)
198 | # and create layouted slide with these inputs
199 | 
200 | 
201 | def create_new_powerpoint() -> Presentation:
202 |     return Presentation(get_powerpoint_template_file())
203 | 
204 | 
205 | def create_title_slide(prs, title, subtitle):
206 |     slide = _create_slide(prs, LAYOUT_TITLE_SLIDE)
207 |     _add_title(slide, title)
208 |     _add_text(slide, 1, subtitle)
209 |     return slide
210 | 
211 | 
212 | def create_large_quote_slide(prs, title, text, background_image=None):
213 |     if bool(text):
214 |         slide = _create_slide(prs, LAYOUT_LARGE_QUOTE)
215 |         if title:
216 |             _add_title(slide, title)
217 |         _add_text(slide, 1, text)
218 |         if background_image:
219 |             _add_image(slide, 11, background_image, False)
220 | 
221 |         # Add black transparent image for making other image behind it transparent (missing feature in python-pptx)
222 |         data_folder = Path(__file__).parent.parent / "data" / "images" / "black-transparent.png"
223 |         _add_image(slide, 12, ImageData(str(data_folder.absolute())), False)
224 | 
225 |         return slide
226 | 
227 | 
228 | def create_image_slide(prs, title=None, image_url=None, original_image_size=True):
229 |     """ Creates a slide with an image covering the whole slide"""
230 |     # TODO debug this: the image can not be set!
231 |     return _create_single_image_slide(
232 |         prs, title, image_url, LAYOUT_TITLE_AND_PICTURE, original_image_size
233 |     )
234 | 
235 | 
236 | def create_full_image_slide(prs, title=None, image_url=None, original_image_size=True):
237 |     """ Creates a slide with an image covering the whole slide"""
238 |     return _create_single_image_slide(
239 |         prs, title, image_url, LAYOUT_FULL_PICTURE, original_image_size
240 |     )
241 | 
242 | 
243 | def create_two_column_images_slide(
244 |     prs,
245 |     title=None,
246 |     caption_1=None,
247 |     image_or_text_1=None,
248 |     caption_2=None,
249 |     image_or_text_2=None,
250 |     original_image_size=True,
251 | ):
252 |     # if _is_valid_content(image_or_text_1) and _is_valid_content(image_or_text_2):
253 |     slide = _create_slide(prs, LAYOUT_TWO_TITLE_AND_IMAGE)
254 |     _add_title(slide, title)
255 |     _add_text(slide, 1, caption_1)
256 |     _add_image_or_text(slide, 13, image_or_text_1, original_image_size)
257 |     _add_text(slide, 3, caption_2)
258 |     _add_image_or_text(slide, 14, image_or_text_2, original_image_size)
259 |     return slide
260 | 
261 | 
262 | def create_three_column_images_slide(
263 |     prs,
264 |     title=None,
265 |     caption_1=None,
266 |     image_or_text_1=None,
267 |     caption_2=None,
268 |     image_or_text_2=None,
269 |     caption_3=None,
270 |     image_or_text_3=None,
271 |     original_image_size=True,
272 | ):
273 |     # if (
274 |     #     _is_valid_content(image_or_text_1)
275 |     #     and _is_valid_content(image_or_text_2)
276 |     #     and _is_valid_content(image_or_text_3)
277 |     # ):
278 |     slide = _create_slide(prs, LAYOUT_THREE_TITLE_AND_IMAGE)
279 |     _add_title(slide, title)
280 |     _add_text(slide, 1, caption_1)
281 |     _add_image_or_text(slide, 13, image_or_text_1, original_image_size)
282 |     _add_text(slide, 3, caption_2)
283 |     _add_image_or_text(slide, 14, image_or_text_2, original_image_size)
284 |     _add_text(slide, 15, caption_3)
285 |     _add_image_or_text(slide, 16, image_or_text_3, original_image_size)
286 |     return slide
287 | 
288 | 
289 | # def create_two_column_images_slide_text_second(prs, title=None, caption_1=None, image_1=None, caption_2=None,
290 | #                                                quote=None,
291 | #                                                original_image_size=True):
292 | #     if bool(image_1):
293 | #         slide = _create_slide(prs, LAYOUT_TWO_TITLE_AND_IMAGE)
294 | #         _add_title(slide, title)
295 | #         _add_text(slide, 1, caption_1)
296 | #         _add_image_or_text(slide, 13, image_1, original_image_size)
297 | #         _add_text(slide, 3, caption_2)
298 | #         _add_image_or_text(slide, 14, quote)
299 | #         return slide
300 | 
301 | 
302 | def _create_single_image_slide(prs, title, image_url, slide_template_idx, fit_image):
303 |     # if _is_valid_content(image_url):
304 |     slide = _create_slide(prs, slide_template_idx)
305 |     _add_title(slide, title)
306 |     _add_image_or_text(slide, 1, image_url, fit_image)
307 |     return slide
308 | 
309 | 
310 | def create_chart_slide(prs, title, chart_type, chart_data, chart_modifier=None):
311 |     slide = _create_slide(prs, LAYOUT_TITLE_AND_CHART)
312 |     _add_title(slide, title)
313 |     chart = _add_chart(slide, 10, chart_type, chart_data).chart
314 |     if chart_modifier:
315 |         chart_modifier(chart, chart_data)
316 |     return slide
317 | 


--------------------------------------------------------------------------------
/talkgenerator/schema/content_generators.py:
--------------------------------------------------------------------------------
  1 | from typing import Collection, Union
  2 | 
  3 | from talkgenerator.sources import pixabay, pexels
  4 | from talkgenerator.schema.content_generator_structures import *
  5 | from talkgenerator.sources import inspirobot
  6 | from talkgenerator.sources import shitpostbot
  7 | from talkgenerator.sources import unsplash
  8 | from talkgenerator.util.generator_util import *
  9 | 
 10 | # ===============================
 11 | # =====  CONTENT GENERATORS =====
 12 | # ===============================
 13 | 
 14 | # === TEXT GENERATORS ===
 15 | 
 16 | # TITLES
 17 | talk_title_generator = create_tracery_generator("data/text-templates/talk_title.json")
 18 | talk_ted_title_generator = create_tracery_generator(
 19 |     "data/text-templates/talk_title.json", "ted_title"
 20 | )
 21 | talk_subtitle_generator = create_tracery_generator(
 22 |     "data/text-templates/talk_subtitle.json"
 23 | )
 24 | 
 25 | 
 26 | def talk_title_generator_if_not_generated(presentation_context):
 27 |     if presentation_context["title"] is not None:
 28 |         return presentation_context["title"]
 29 |     return talk_title_generator(presentation_context)
 30 | 
 31 | 
 32 | default_slide_title_generator = create_templated_text_generator(
 33 |     "data/text-templates/default_slide_title.txt"
 34 | )
 35 | deep_abstract_generator = create_templated_text_generator(
 36 |     "data/text-templates/deep_abstract.txt"
 37 | )
 38 | 
 39 | default_or_no_title_generator = CombinedGenerator(
 40 |     (1, default_slide_title_generator), (1, NoneGenerator())
 41 | )
 42 | 
 43 | anticipation_title_generator = create_templated_text_generator(
 44 |     "data/text-templates/anticipation_title.txt"
 45 | )
 46 | 
 47 | conclusion_title_generator = create_templated_text_generator(
 48 |     "data/text-templates/conclusion_title.txt"
 49 | )
 50 | inspiration_title_generator = create_templated_text_generator(
 51 |     "data/text-templates/inspiration.txt"
 52 | )
 53 | anecdote_title_generator = create_templated_text_generator(
 54 |     "data/text-templates/anecdote_title.txt"
 55 | )
 56 | history_title_generator = create_templated_text_generator(
 57 |     "data/text-templates/history.txt"
 58 | )
 59 | history_person_title_generator = create_templated_text_generator(
 60 |     "data/text-templates/history_person.txt"
 61 | )
 62 | history_and_history_person_title_generator = CombinedGenerator(
 63 |     (4, history_title_generator), (6, history_person_title_generator)
 64 | )
 65 | about_me_title_generator = create_templated_text_generator(
 66 |     "data/text-templates/about_me_title.txt"
 67 | )
 68 | 
 69 | # NAMES
 70 | historical_name_generator = create_tracery_generator(
 71 |     "data/text-templates/name.json", "title_name"
 72 | )
 73 | full_name_generator = create_tracery_generator(
 74 |     "data/text-templates/name.json", "full_name"
 75 | )
 76 | 
 77 | # ABOUT ME
 78 | _about_me_facts_grammar = "data/text-templates/about_me_facts.json"
 79 | book_description_generator = create_tracery_generator(
 80 |     _about_me_facts_grammar, "book_description"
 81 | )
 82 | location_description_generator = create_tracery_generator(
 83 |     _about_me_facts_grammar, "location_description"
 84 | )
 85 | hobby_description_generator = create_tracery_generator(
 86 |     _about_me_facts_grammar, "hobby_description"
 87 | )
 88 | job_generator = create_tracery_generator(_about_me_facts_grammar, "job")
 89 | country_generator = create_tracery_generator(_about_me_facts_grammar, "country")
 90 | 
 91 | # PROMPTS & CHALLENGES
 92 | 
 93 | anecdote_prompt_generator = create_templated_text_generator(
 94 |     "data/text-templates/anecdote_prompt.txt"
 95 | )
 96 | 
 97 | # QUOTES
 98 | goodreads_quote_generator = GoodReadsQuoteGenerator(250)
 99 | goodreads_short_quote_generator = GoodReadsQuoteGenerator(140)
100 | 
101 | # DOUBLE CAPTIONS
102 | 
103 | _double_image_captions_generator = create_templated_text_generator(
104 |     "data/text-templates/double_captions.txt"
105 | )
106 | _triple_image_captions_generator = create_templated_text_generator(
107 |     "data/text-templates/triple_captions.txt"
108 | )
109 | _historic_double_captions_generator = create_templated_text_generator(
110 |     "data/text-templates/historic_double_captions.txt"
111 | )
112 | 
113 | double_image_captions_generator = SplitCaptionsGenerator(
114 |     _double_image_captions_generator
115 | )
116 | triple_image_captions_generator = SplitCaptionsGenerator(
117 |     _triple_image_captions_generator
118 | )
119 | historic_double_captions_generator = SplitCaptionsGenerator(
120 |     _historic_double_captions_generator
121 | )
122 | 
123 | # Conclusions
124 | _conclusions_tuple_grammar = "data/text-templates/conclusion_tuple.json"
125 | conclusion_two_captions_tuple_generator = SplitCaptionsGenerator(
126 |     create_tracery_generator(_conclusions_tuple_grammar, "two_conclusions")
127 | )
128 | 
129 | conclusion_three_captions_tuple_generator = SplitCaptionsGenerator(
130 |     create_tracery_generator(_conclusions_tuple_grammar, "three_conclusions")
131 | )
132 | 
133 | # === IMAGE GENERATORS ===
134 | 
135 | # INSPIROBOT
136 | inspirobot_image_generator = inspirobot.get_random_inspirobot_image
137 | 
138 | # GIFS
139 | 
140 | reddit_gif_generator = create_reddit_image_generator(
141 |     "gifs", "gif", "gifextra", "nonononoYES"
142 | )
143 | 
144 | combined_gif_generator = CombinedGenerator((1, reddit_gif_generator))
145 | 
146 | # REDDIT
147 | 
148 | meme_reddit_image_generator = create_reddit_image_generator(
149 |     "meme",
150 |     "memes",
151 |     "MemeEconomy",
152 |     "wholesomememes",
153 |     "dankmemes",
154 |     "AdviceAnimals",
155 |     "comics",
156 | )
157 | weird_reddit_image_generator = create_reddit_image_generator(
158 |     "hmmm",
159 |     "hmm",
160 |     "wtf",
161 |     "wtfstockphotos",
162 |     "weirdstockphotos",
163 |     "darkstockphotos",
164 |     "photoshopbattles",
165 |     "confusing_perspective",
166 |     "cursedimages",
167 |     "HybridAnimals",
168 |     "EyeBleach",
169 |     "natureismetal",
170 |     "195",
171 | )
172 | 
173 | neutral_reddit_image_generator = create_reddit_image_generator(
174 |     "Cinemagraphs",
175 |     "itookapicture",
176 |     "Art",
177 |     "artstore",
178 |     "pics",
179 |     "analog",
180 |     "ExposurePorn",
181 |     "Illustration",
182 | )
183 | 
184 | shitpostbot_image_generator = ExternalImageListGenerator(
185 |     SeededGenerator(
186 |         BackupGenerator(
187 |             shitpostbot.search_images_rated, shitpostbot.get_random_images_rated
188 |         )
189 |     ),
190 |     weighted=True,
191 | )
192 | 
193 | weird_punchline_static_image_generator = CombinedGenerator(
194 |     (4, weird_reddit_image_generator),
195 |     (6, shitpostbot_image_generator),
196 |     (1, meme_reddit_image_generator),
197 | )
198 | 
199 | weird_punchline_image_generator = CombinedGenerator(
200 |     (10, weird_reddit_image_generator),
201 |     (8, shitpostbot_image_generator),
202 |     (6, combined_gif_generator),
203 |     (1, meme_reddit_image_generator),
204 | )
205 | 
206 | 
207 | # UNSPLASH
208 | generate_unsplash_image_from_word = ExternalImageListGenerator(
209 |     unsplash.search_photos, check_image_validness=False
210 | )
211 | generate_random_unsplash_image_from_word = ExternalImageListGenerator(
212 |     unsplash.random_as_list, check_image_validness=False
213 | )
214 | generate_unsplash_image = SeededGenerator(generate_unsplash_image_from_word)
215 | generate_random_unsplash_image = SeededGenerator(
216 |     generate_random_unsplash_image_from_word
217 | )
218 | 
219 | # PIXABAY
220 | generate_pixabay_image_from_word = ExternalImageListGenerator(pixabay.search_photos)
221 | generate_horizontal_pixabay_image_from_word = ExternalImageListGenerator(
222 |     pixabay.search_horizontal
223 | )
224 | generate_pixabay_image = SeededGenerator(generate_pixabay_image_from_word)
225 | # PEXELS
226 | 
227 | generate_pexels_image_from_word = ExternalImageListGenerator(pexels.search_photos)
228 | generate_pexels_image = SeededGenerator(generate_pexels_image_from_word)
229 | 
230 | # COPYRIGHT FREE
231 | 
232 | copyright_free_generator = CombinedGenerator(
233 |     (1, generate_unsplash_image),
234 |     (1, generate_pixabay_image),
235 |     (1, generate_pexels_image),
236 |     (0.01, generate_random_unsplash_image),
237 | )
238 | copyright_free_generator_from_word = CombinedGenerator(
239 |     (1, generate_unsplash_image_from_word),
240 |     (1, generate_pixabay_image_from_word),
241 |     (1, generate_pexels_image_from_word),
242 |     (0.01, generate_random_unsplash_image_from_word),
243 | )
244 | 
245 | generate_horizontal_pixabay_image = CombinedGenerator(
246 |     (100, SeededGenerator(generate_horizontal_pixabay_image_from_word)),
247 |     # Backup:
248 |     (0.01, copyright_free_generator),
249 | )
250 | 
251 | copyright_free_related_generator_from_word = ConceptNetMapper(
252 |     copyright_free_generator_from_word
253 | )
254 | copyright_free_related_generator = SeededGenerator(
255 |     copyright_free_related_generator_from_word
256 | )
257 | 
258 | 
259 | def copyright_free_prefixed_generator(prefixes: Union[str, Collection[str]]):
260 |     return SeededGenerator(copyright_free_prefixed_generator_from_word(prefixes))
261 | 
262 | 
263 | def copyright_free_prefixed_generator_from_word(prefixes: Union[str, Collection[str]]):
264 |     if isinstance(prefixes, str):
265 |         return PrefixedGenerator(prefixes, copyright_free_generator_from_word)
266 |     generators = [
267 |         (1, PrefixedGenerator(p, copyright_free_generator_from_word)) for p in prefixes
268 |     ]
269 |     return CombinedGenerator(*generators)
270 | 
271 | 
272 | weird_copyright_free_generator = copyright_free_prefixed_generator(
273 |     ["weird", "humor", "funny"]
274 | )
275 | normal_or_weird_copyright_free_generator = CombinedGenerator(
276 |     (1, copyright_free_generator), (1, weird_copyright_free_generator)
277 | )
278 | 
279 | # NEUTRAL
280 | 
281 | neutral_image_generator = CombinedGenerator(
282 |     (1000, copyright_free_generator), (300, neutral_reddit_image_generator),
283 | )
284 | 
285 | neutral_image_generator_from_word = CombinedGenerator(
286 |     (1000, copyright_free_generator_from_word),
287 |     (300, UnseededGenerator(neutral_reddit_image_generator)),
288 | )
289 | 
290 | neutral_or_weird_image_generator = CombinedGenerator(
291 |     (1, neutral_image_generator), (1, weird_punchline_image_generator)
292 | )
293 | 
294 | # OLD/VINTAGE
295 | vintage_person_generator = create_reddit_image_generator("OldSchoolCool")
296 | vintage_picture_generator = create_reddit_image_generator(
297 |     "TheWayWeWere", "100yearsago", "ColorizedHistory"
298 | )
299 | 
300 | reddit_book_cover_generator = create_reddit_image_generator(
301 |     "BookCovers", "fakebookcovers", "coverdesign", "bookdesign"
302 | )
303 | 
304 | reddit_location_image_generator = create_reddit_image_generator(
305 |     "evilbuildings", "itookapicture", "SkyPorn", "EarthPorn"
306 | )
307 | 
308 | # TUPLED ABOUT ME
309 | 
310 | about_me_hobby_tuple_generator = TupledGenerator(
311 |     hobby_description_generator, weird_punchline_image_generator
312 | )
313 | about_me_book_tuple_generator = TupledGenerator(
314 |     book_description_generator, reddit_book_cover_generator
315 | )
316 | about_me_location_tuple_generator = TupledGenerator(
317 |     location_description_generator, reddit_location_image_generator
318 | )
319 | 
320 | about_me_job_tuple_generator = MappedGenerator(
321 |     InspiredTupleGenerator(
322 |         MappedGenerator(job_generator, str.title), neutral_image_generator_from_word
323 |     ),
324 |     JobPrefixApplier(),
325 | )
326 | 
327 | about_me_country_tuple_generator = MappedGenerator(
328 |     InspiredTupleGenerator(country_generator, neutral_image_generator_from_word),
329 |     CountryPrefixApplier(),
330 | )
331 | 
332 | about_me_location_or_country_tuple_generator = CombinedGenerator(
333 |     (3, about_me_country_tuple_generator), (1, about_me_location_tuple_generator)
334 | )
335 | 
336 | # Charts
337 | 
338 | reddit_chart_generator = create_reddit_image_generator(
339 |     "dataisbeautiful", "funnycharts", "charts"
340 | )
341 | 


--------------------------------------------------------------------------------
/talkgenerator/sources/chart.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import random
  3 | 
  4 | from pptx.chart.data import ChartData
  5 | from pptx.chart.data import XyChartData
  6 | from pptx.enum.chart import XL_CHART_TYPE
  7 | from pptx.enum.chart import XL_LABEL_POSITION
  8 | from pptx.enum.chart import XL_TICK_MARK
  9 | 
 10 | from talkgenerator.sources import conceptnet, text_generator
 11 | from talkgenerator.util import generator_util
 12 | 
 13 | yes_no_question_generator = text_generator.TraceryTextGenerator(
 14 |     "data/text-templates/chart_texts.json", "yes_no_question"
 15 | ).generate
 16 | funny_yes_no_answer_generator = text_generator.TraceryTextGenerator(
 17 |     "data/text-templates/chart_texts.json", "funny_yes_no_answer"
 18 | ).generate
 19 | location_question_generator = text_generator.TraceryTextGenerator(
 20 |     "data/text-templates/chart_texts.json", "location_question"
 21 | ).generate
 22 | property_question_generator = text_generator.TraceryTextGenerator(
 23 |     "data/text-templates/chart_texts.json", "property_question"
 24 | ).generate
 25 | correlation_title_generator = text_generator.TraceryTextGenerator(
 26 |     "data/text-templates/chart_texts.json", "correlation_title"
 27 | ).generate
 28 | 
 29 | 
 30 | # DATA POINTS HELPERS
 31 | 
 32 | 
 33 | def add_noise_to_points(max_noise_ratio, datapoints):
 34 |     return [add_noise_to_point(max_noise_ratio, point) for point in datapoints]
 35 | 
 36 | 
 37 | def add_noise_to_point(max_noise_ratio, datapoint):
 38 |     return max(
 39 |         0, datapoint + (datapoint * random.uniform(-max_noise_ratio, max_noise_ratio))
 40 |     )
 41 | 
 42 | 
 43 | def add_gaussian_noise_to_multidim_points(max_noise_ratio, datapoints):
 44 |     return [
 45 |         _add_gaussian_noise_to_multidim_point(max_noise_ratio, point)
 46 |         for point in datapoints
 47 |     ]
 48 | 
 49 | 
 50 | def _add_gaussian_noise_to_multidim_point(max_noise_ratio, datapoint):
 51 |     return [value * random.gauss(1, max_noise_ratio) for value in datapoint]
 52 | 
 53 | 
 54 | def normalise_data(datapoints):
 55 |     total_sum = sum(datapoints)
 56 |     return [datapoint / total_sum for datapoint in datapoints]
 57 | 
 58 | 
 59 | def is_too_similar_for_axes(word1, word2):
 60 |     """ Checks if the words contain each other """
 61 |     return word1 in word2 or word2 in word1
 62 | 
 63 | 
 64 | def create_interesting_curve_function():
 65 |     # Build an optional list
 66 | 
 67 |     # random small integer
 68 |     a = random.uniform(-10, 10)
 69 |     b = random.uniform(0.001, 10)
 70 | 
 71 |     # random relative
 72 |     r = random.uniform(0, 1)
 73 | 
 74 |     interesting_functions = [
 75 |         lambda x: a * x,
 76 |         lambda x: a / x,
 77 |         lambda x: a + x,
 78 |         lambda x: a - x,
 79 |         # lambda x: min(float(5e8), float(a ** math.log(x))),
 80 |         # lambda x: min(float(5e8), float(x ** math.log(a))),
 81 |         lambda x: math.sin(x),
 82 |     ]
 83 | 
 84 |     chosen = random.choice(interesting_functions)
 85 | 
 86 |     # Add chance of adding another function
 87 |     # random_number = random.uniform(0, 1)
 88 |     # if random_number < 0.4:
 89 |     #     chosen = lambda x: random.choice(interesting_functions)(chosen(x))
 90 |     # elif random_number < 0.8:
 91 |     #     chosen = lambda x: random.choice(interesting_functions)(x) * chosen(x)
 92 |     # else:
 93 |     #     chosen = lambda x: random.choice(interesting_functions)(x) + chosen(x)
 94 | 
 95 |     return chosen
 96 | 
 97 | 
 98 | # DATA SET CREATION
 99 | 
100 | 
101 | def create_equal_data_with_outlier_end(
102 |     size, noise_factor, normal_min, normal_max, outlier_min_size, outlier_max_size
103 | ):
104 |     # Create data with same number between normal_min and normal_max everywhere
105 |     datapoints = [random.uniform(normal_min, normal_max) for _ in range(0, size)]
106 | 
107 |     # Make last number an outlier
108 |     datapoints[-1] = random.uniform(outlier_min_size, outlier_max_size)
109 | 
110 |     # Apply noise
111 |     datapoints = add_noise_to_points(noise_factor, datapoints)
112 | 
113 |     return datapoints
114 | 
115 | 
116 | def generate_random_x(lower_bound, upper_bound, number):
117 |     return [random.uniform(lower_bound, upper_bound) for _ in range(number)]
118 | 
119 | 
120 | def generate_y(xs, function):
121 |     return [(x, function(x)) for x in xs]
122 | 
123 | 
124 | # CHART TYPES PROPERTIES SETTING
125 | 
126 | 
127 | def add_data_to_series(serie, data_points):
128 |     for data_point in data_points:
129 |         x, y = data_point
130 |         serie.add_data_point(x, y)
131 | 
132 | 
133 | def _set_pie_label_positions(chart, series, chart_data, label_position):
134 |     chart.plots[0].has_data_labels = True
135 |     for i in range(len(chart_data.categories)):
136 |         point = series.points[i]
137 |         value = series.values[i]
138 |         point.data_label.text_frame.text = "{} ({:.0%})".format(
139 |             chart_data.categories[i].label, value
140 |         )
141 |         if label_position:
142 |             point.data_label.position = label_position
143 | 
144 | 
145 | def set_histogram_properties(chart, chart_data):
146 |     value_axis = chart.value_axis
147 |     value_axis.mayor_tick_mark = XL_TICK_MARK.NONE
148 |     value_axis.minor_tick_mark = XL_TICK_MARK.NONE
149 |     value_axis.has_mayor_gridlines = False
150 |     value_axis.has_minor_gridlines = False
151 |     # value_axis.visible = False
152 | 
153 |     tick_labels = value_axis.tick_labels
154 |     tick_labels.number_format = "0%"
155 | 
156 |     return chart
157 | 
158 | 
159 | def set_pie_properties(chart, chart_data):
160 |     if chart and chart_data:
161 |         chart.has_legend = False
162 |         chart.has_title = False
163 | 
164 |         # Data points
165 |         series = chart.series[0]
166 |         # Check if there are small values that can't be contained on the pie piece
167 |         label_position = (
168 |             XL_LABEL_POSITION.OUTSIDE_END
169 |             if any(t < 0.10 for t in series.values)
170 |             else XL_LABEL_POSITION.CENTER
171 |         )
172 | 
173 |         # set labels to contain category and value
174 |         _set_pie_label_positions(chart, series, chart_data, label_position)
175 | 
176 | 
177 | def set_doughnut_properties(chart, chart_data):
178 |     if chart and chart_data:
179 |         chart.has_legend = False
180 |         series = chart.series[0]
181 |         _set_pie_label_positions(chart, series, chart_data, None)
182 | 
183 | 
184 | def create_set_scatter_properties(x_label, y_label):
185 |     def set_scatter_properties(chart, chart_data):
186 |         chart.has_legend = False
187 |         x_axis = chart.category_axis
188 |         y_axis = chart.value_axis
189 | 
190 |         # TODO: Fix it so that this actually has a title
191 |         # x_axis.has_title = True
192 |         # y_axis.has_title = True
193 | 
194 |     return set_scatter_properties
195 | 
196 | 
197 | # CHART TYPES
198 | PIE = XL_CHART_TYPE.PIE, set_pie_properties
199 | PROCENT_HISTOGRAM = XL_CHART_TYPE.COLUMN_CLUSTERED, set_histogram_properties
200 | DOUGHNUT = XL_CHART_TYPE.DOUGHNUT, set_doughnut_properties
201 | 
202 | # CHART DATA GENERATOR
203 | 
204 | _YES_NO_CHART_TYPES = PIE, PROCENT_HISTOGRAM, DOUGHNUT
205 | 
206 | 
207 | def generate_yes_no_large_funny_answer_chart_data(presentation_context):
208 |     title = yes_no_question_generator(presentation_context)
209 | 
210 |     presentation_context["chart_title"] = title
211 | 
212 |     categories = ["Yes", "No", funny_yes_no_answer_generator(presentation_context)]
213 |     series_data = normalise_data(
214 |         create_equal_data_with_outlier_end(len(categories), 0.2, 1, 2.5, 1, 20)
215 |     )
216 | 
217 |     chart_data = ChartData()
218 |     chart_data.categories = categories
219 |     chart_data.add_series("", series_data)
220 |     return title, chart_data
221 | 
222 | 
223 | def _generate_conceptnet_data(
224 |     presentation_context, title_generator, conceptnet_function
225 | ):
226 |     seed = presentation_context["seed"]
227 |     title = title_generator(presentation_context)
228 | 
229 |     presentation_context["chart_title"] = title
230 | 
231 |     conceptnet_relations = conceptnet_function(seed)
232 | 
233 |     if conceptnet_relations:
234 |         conceptnet_relations = conceptnet.remove_duplicates(conceptnet_relations)
235 |         conceptnet_relations = conceptnet.remove_containing(conceptnet_relations, seed)
236 |         random.shuffle(conceptnet_relations)
237 | 
238 |         conceptnet_relations = conceptnet_relations[0 : random.randint(2, 5)]
239 |         categories = [location[1] for location in conceptnet_relations]
240 |         values = [float(location[0]) ** 2 for location in conceptnet_relations]
241 | 
242 |         if len(categories) == 0:
243 |             return None
244 |         series_data = normalise_data(values)
245 | 
246 |         chart_data = ChartData()
247 |         chart_data.categories = categories
248 |         chart_data.add_series("", series_data)
249 |         return title, chart_data
250 | 
251 | 
252 | def generate_location_data(presentation_context):
253 |     return _generate_conceptnet_data(
254 |         presentation_context,
255 |         location_question_generator,
256 |         conceptnet.get_weighted_related_locations,
257 |     )
258 | 
259 | 
260 | def generate_property_data(presentation_context):
261 |     return _generate_conceptnet_data(
262 |         presentation_context,
263 |         property_question_generator,
264 |         conceptnet.get_weighted_properties,
265 |     )
266 | 
267 | 
268 | # FULL CHART GENERATORS
269 | 
270 | 
271 | def generate_yes_no_pie(presentation_context):
272 |     title, chart_data = generate_yes_no_large_funny_answer_chart_data(
273 |         presentation_context
274 |     )
275 |     chart_type, chart_modifier = random.choice(_YES_NO_CHART_TYPES)
276 |     return title, chart_type, chart_data, chart_modifier
277 | 
278 | 
279 | def generate_location_pie(presentation_context):
280 |     result = generate_location_data(presentation_context)
281 |     if result:
282 |         title, chart_data = result
283 |         chart_type, chart_modifier = random.choice(_YES_NO_CHART_TYPES)
284 |         return title, chart_type, chart_data, chart_modifier
285 | 
286 | 
287 | def generate_property_pie(presentation_context):
288 |     result = generate_property_data(presentation_context)
289 |     if result:
290 |         title, chart_data = result
291 |         chart_type, chart_modifier = random.choice(_YES_NO_CHART_TYPES)
292 |         return title, chart_type, chart_data, chart_modifier
293 | 
294 | 
295 | _CORRELATION_WORD_GENERATOR = generator_util.WalkingGenerator(
296 |     generator_util.CombinedGenerator(
297 |         (2, conceptnet.unweighted_antonym_generator),
298 |         (1, conceptnet.unweighted_related_word_generator),
299 |     ),
300 |     steps=5,
301 | )
302 | 
303 | 
304 | def generate_correlation_curve(presentation_context):
305 |     x_label = presentation_context["topic"]
306 |     y_label = presentation_context["seed"]
307 | 
308 |     if is_too_similar_for_axes(x_label, y_label):
309 |         x_label = _CORRELATION_WORD_GENERATOR(y_label)
310 |     if is_too_similar_for_axes(x_label, y_label):
311 |         x_label = "time"
312 |     presentation_context.update({"x_label": x_label, "y_label": y_label})
313 | 
314 |     title = correlation_title_generator(presentation_context)
315 | 
316 |     if not title:
317 |         return None
318 | 
319 |     chart_data = XyChartData()
320 | 
321 |     serie = chart_data.add_series("Model")
322 | 
323 |     # Generate some Xs, with chance of exponential differences in size between generated x axes
324 |     xs = generate_random_x(
325 |         0, 2 ** random.uniform(1, 10), int(2 ** random.uniform(3, 8))
326 |     )
327 | 
328 |     # Generate y
329 |     data_points = generate_y(xs, create_interesting_curve_function())
330 | 
331 |     max_x = max(xs)
332 | 
333 |     data_points = add_gaussian_noise_to_multidim_points(
334 |         1.5 * random.uniform(0, max_x / 10), data_points
335 |     )
336 | 
337 |     # Remove negatives
338 |     data_points = [(abs(datapoint[0]), abs(datapoint[1])) for datapoint in data_points]
339 | 
340 |     add_data_to_series(serie, data_points)
341 | 
342 |     return (
343 |         title,
344 |         XL_CHART_TYPE.XY_SCATTER,
345 |         chart_data,
346 |         create_set_scatter_properties(x_label, y_label),
347 |     )
348 | 


--------------------------------------------------------------------------------
/talkgenerator/util/generator_util.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Light, commonly used, non-specific generators that are helpful shortcuts for creating
  3 | certain types of (content) generators
  4 | """
  5 | import os
  6 | import sys
  7 | import random
  8 | import logging
  9 | import inspect
 10 | from typing import Callable, Optional, Dict, Union, Tuple
 11 | 
 12 | import requests
 13 | 
 14 | from talkgenerator.datastructures.image_data import ImageData
 15 | from talkgenerator.util import random_util, os_util
 16 | 
 17 | logger = logging.getLogger("talkgenerator")
 18 | 
 19 | 
 20 | def fullname(o):
 21 |   # o.__module__ + "." + o.__class__.__qualname__ is an example in
 22 |   # this context of H.L. Mencken's "neat, plausible, and wrong."
 23 |   # Python makes no guarantees as to whether the __module__ special
 24 |   # attribute is defined, so we take a more circumspect approach.
 25 |   # Alas, the module name is explicitly excluded from __qualname__
 26 |   # in Python 3.
 27 | 
 28 |   module = o.__class__.__module__
 29 |   if module is None or module == str.__class__.__module__:
 30 |     return o.__class__.__name__  # Avoid reporting __builtin__
 31 |   else:
 32 |     return module + '.' + o.__class__.__name__
 33 | 
 34 | 
 35 | class Generator(object):
 36 |     def __call__(self, seed: str):
 37 |         raise NotImplemented(
 38 |             str(self) + " has not provided an implementation for the generator"
 39 |         )
 40 | 
 41 | 
 42 | class PrefixedGenerator(Generator):
 43 |     def __init__(self, prefix: str, generator: Generator):
 44 |         self._prefix = prefix
 45 |         self._generator = generator
 46 | 
 47 |     def __call__(self, seed: str):
 48 |         return self._generator(self._prefix + " " + seed)
 49 | 
 50 | 
 51 | class PrefixedPresentationContextGenerator(Generator):
 52 |     def __init__(self, prefix: str, generator):
 53 |         self._prefix = prefix
 54 |         self._generator = generator
 55 | 
 56 |     def __call__(self, presentation_context):
 57 |         presentation_context = dict(presentation_context)
 58 |         presentation_context["seed"] = self._prefix + " " + presentation_context["seed"]
 59 |         return self._generator(presentation_context)
 60 | 
 61 | 
 62 | class CombinedGenerator(Generator):
 63 |     def __init__(self, *weighted_generators: Tuple[Union[int, float], Generator]):
 64 |         self._weighted_generators = weighted_generators
 65 | 
 66 |     def __call__(self, seed: Union[str, Dict[str, str]]):
 67 |         logger.debug('Calling generator_util.CombinedGenerator')
 68 |         current_weighted_generators = list(self._weighted_generators)
 69 |         logger.debug("current_weighted_generators: {}".format(current_weighted_generators))
 70 |         while len(current_weighted_generators) > 0:
 71 |             generator = random_util.weighted_random(current_weighted_generators)
 72 |             logger.debug("current generator: {}".format(generator))
 73 |             logger.debug("generator seed: {}".format(seed))
 74 |             generated = generator(seed)
 75 |             logger.debug("generated: {}".format(generated))
 76 |             if generated is not None:
 77 |                 return generated
 78 |             _remove_object_from_weighted_list(current_weighted_generators, generator)
 79 | 
 80 | 
 81 | def _remove_object_from_weighted_list(current_weighted_generators, generator):
 82 |     for i in current_weighted_generators:
 83 |         if i and i[1] == generator:
 84 |             current_weighted_generators.remove(i)
 85 | 
 86 | 
 87 | class MappedGenerator(Generator):
 88 |     def __init__(self, generator, *functions):
 89 |         self._generator = generator
 90 |         self._functions = functions
 91 | 
 92 |     def __call__(self, presentation_context):
 93 |         # print("MappedGenerator generator using", presentation_context)
 94 |         generated = self._generator(presentation_context)
 95 |         for func in self._functions:
 96 |             generated = func(generated)
 97 |         return generated
 98 | 
 99 | 
100 | class TupledGenerator(Generator):
101 |     """ Creates a tuple generator that generates every tuple value independent from the others"""
102 | 
103 |     def __init__(self, *generators):
104 |         self._generators = generators
105 | 
106 |     def __call__(self, presentation_context):
107 |         # print("TupledGenerator generator using", presentation_context)
108 |         return tuple(
109 |             [generator(presentation_context) for generator in self._generators]
110 |         )
111 | 
112 | 
113 | class InspiredTupleGenerator(Generator):
114 |     """ The second generator will get the generator 1 as input, outputting the tuple """
115 | 
116 |     def __init__(self, generator_1, generator_2):
117 |         self._generator_1 = generator_1
118 |         self._generator_2 = generator_2
119 | 
120 |     def __call__(self, presentation_context):
121 |         # print("InspiredTupleGenerator generator using", presentation_context)
122 |         gen_1 = self._generator_1(presentation_context)
123 |         gen_2 = self._generator_2(gen_1)
124 |         return gen_1, gen_2
125 | 
126 | 
127 | # == TRIVIAL GENERATORS ==
128 | 
129 | 
130 | class SeededGenerator(Generator):
131 |     def __init__(self, simple_generator):
132 |         self._simple_generator = simple_generator
133 | 
134 |     def __call__(self, presentation_context):
135 |         logger.debug('Calling generator_util.SeededGenerator')
136 |         logger.debug('presentation_context: {}'.format(presentation_context))
137 |         logger.debug('self._simple_generator: {}'.format(self._simple_generator))
138 |         return self._simple_generator(presentation_context["seed"])
139 | 
140 | 
141 | class UnseededGenerator(Generator):
142 |     def __init__(self, simple_generator):
143 |         self._simple_generator = simple_generator
144 | 
145 |     def __call__(self, seed):
146 |         presentation_context = {"seed": seed}
147 |         return self._simple_generator(presentation_context)
148 | 
149 | 
150 | class NoneGenerator(Generator):
151 |     def __init__(self):
152 |         pass
153 | 
154 |     def __call__(self, presentation_context):
155 |         return None
156 | 
157 | 
158 | class IdentityGenerator(Generator):
159 |     def __init__(self, input_word):
160 |         self._input_word = input_word
161 | 
162 |     def __call__(self, presentation_context):
163 |         return self._input_word
164 | 
165 | 
166 | class TitledIdentityGenerator(Generator):
167 |     def __init__(self, input_word):
168 |         self._input_word = input_word
169 | 
170 |     def __call__(self, presentation_context):
171 |         if self._input_word:
172 |             return self._input_word.title()
173 | 
174 | 
175 | class StaticGenerator(Generator):
176 |     def __init__(self, always_generate_this):
177 |         self._always_generate_this = always_generate_this
178 | 
179 |     def __call__(self, presentation_context=None):
180 |         return self._always_generate_this
181 | 
182 | 
183 | class FromListGenerator(Generator):
184 |     def __init__(self, list_generator):
185 |         self._list_generator = list_generator
186 | 
187 |     def __call__(self, presentation_context):
188 |         return random_util.choice_optional(self._list_generator(presentation_context))
189 | 
190 | 
191 | seeded_identity_generator = SeededGenerator(IdentityGenerator)
192 | seeded_titled_identity_generator = SeededGenerator(TitledIdentityGenerator)
193 | 
194 | 
195 | class ExternalImageListGenerator(Generator):
196 |     def __init__(
197 |         self, image_generator, check_image_validness=True, weighted=False,
198 |     ):
199 |         self._image_generator = image_generator
200 |         self._check_image_validness = check_image_validness
201 |         self._weighted = weighted
202 | 
203 |     def __call__(self, presentation_context) -> Optional[ImageData]:
204 |         logger.debug('Calling generator_util.ExternalImageListGenerator')
205 |         logger.debug('self._image_generator: {}'.format(self._image_generator))
206 |         logger.debug('self._check_image_validness: {}'.format(self._check_image_validness))
207 |         logger.debug('self._weighted: {}'.format(self._weighted))
208 |         logger.debug('module where function def: {}'.format(self._image_generator.__module__))
209 |         logger.debug('****************************************************************')
210 |         images = self._image_generator(presentation_context)
211 |         # logger.debug('images: {}'.format(images))
212 |         logger.debug('****************************************************************')
213 | 
214 |         while bool(images) and len(images) > 0:
215 |             original_chosen_image = (
216 |                 random_util.weighted_random([image for image in images if image[0] > 0])
217 |                 if self._weighted
218 |                 else random.choice(images)
219 |             )
220 |             if isinstance(original_chosen_image, str):
221 |                 chosen_image = ImageData(image_url=original_chosen_image)
222 |             elif isinstance(original_chosen_image, ImageData):
223 |                 chosen_image = original_chosen_image
224 |             else:
225 |                 logger.warning(
226 |                     "INVALID IMAGE INPUT FOR EXTERNAL IMAGE GENERATOR / "
227 |                     + str(original_chosen_image)
228 |                     + " / "
229 |                     + str(type(original_chosen_image))
230 |                 )
231 |                 images.remove(original_chosen_image)
232 |                 continue
233 | 
234 |             return chosen_image
235 |         return None
236 | 
237 | 
238 | class BackupGenerator(Generator):
239 |     def __init__(self, *generator_list):
240 |         self._generator_list = generator_list
241 | 
242 |     def __call__(self, context):
243 |         for generator in self._generator_list:
244 |             generated = generator(context)
245 |             if generated:
246 |                 return generated
247 | 
248 | 
249 | class WeightedGenerator(Generator):
250 |     def __init__(self, weighted_list_creator):
251 |         self._weighted_list_creator = weighted_list_creator
252 | 
253 |     def __call__(self, argument):
254 |         weighted_list = self._weighted_list_creator(argument)
255 |         if weighted_list:
256 |             return random_util.weighted_random(weighted_list)
257 | 
258 | 
259 | class UnweightedGenerator(Generator):
260 |     def __init__(self, weighted_list_creator):
261 |         self._weighted_list_creator = weighted_list_creator
262 | 
263 |     def __call__(self, argument):
264 |         weighted_list = self._weighted_list_creator(argument)
265 |         if weighted_list:
266 |             return random_util.choice_optional(
267 |                 [element[1] for element in weighted_list]
268 |             )
269 | 
270 | 
271 | class WalkingGenerator(Generator):
272 |     """ This type of generator uses its output as input for a next step, taking concepts a few steps away """
273 | 
274 |     def __init__(self, inner_generator, steps):
275 |         self._inner_generator = inner_generator
276 |         self._steps = steps
277 | 
278 |     def __call__(self, seed):
279 |         history = set()
280 |         history.add(seed)
281 |         current = seed
282 |         for i in range(self._steps):
283 |             generated = self._inner_generator(current)
284 |             if generated:
285 |                 current = generated
286 |                 history.add(current)
287 | 
288 |         return current
289 | 
290 | 
291 | class ImageGenerator(Generator):
292 |     def __call__(self, seed: str) -> ImageData:
293 |         raise NotImplementedError("Not implemented image generator")
294 | 
295 | 
296 | class UnsourcedImageGenerator(ImageGenerator):
297 |     def __init__(self, image_url_generator: Callable[[str], str]):
298 |         self._image_url_generator = image_url_generator
299 | 
300 |     def __call__(self, seed: str) -> ImageData:
301 |         return ImageData(image_url=self._image_url_generator(seed))
302 | 
303 | 
304 | class RelatedMappingGenerator(Generator):
305 |     def __init__(
306 |         self, related_word_generator: Callable[[str], str], generator: Generator
307 |     ):
308 |         self._related_word_generator = related_word_generator
309 |         self._generator = generator
310 | 
311 |     def __call__(self, seed: str) -> Optional[str]:
312 |         mapped_seed = self._related_word_generator(seed)
313 |         return self._generator(mapped_seed)
314 | 


--------------------------------------------------------------------------------
/talkgenerator/data/eval/common_words.txt:
--------------------------------------------------------------------------------
   1 | time
   2 | way
   3 | year
   4 | work
   5 | government
   6 | day
   7 | man
   8 | world
   9 | life
  10 | part
  11 | house
  12 | course
  13 | case
  14 | system
  15 | place
  16 | end
  17 | group
  18 | company
  19 | party
  20 | information
  21 | school
  22 | fact
  23 | money
  24 | point
  25 | example
  26 | state
  27 | business
  28 | night
  29 | area
  30 | water
  31 | thing
  32 | family
  33 | head
  34 | hand
  35 | order
  36 | john
  37 | side
  38 | home
  39 | development
  40 | week
  41 | power
  42 | country
  43 | council
  44 | use
  45 | service
  46 | room
  47 | market
  48 | problem
  49 | court
  50 | lot
  51 | a
  52 | war
  53 | police
  54 | interest
  55 | car
  56 | law
  57 | road
  58 | form
  59 | face
  60 | education
  61 | policy
  62 | research
  63 | sort
  64 | office
  65 | body
  66 | person
  67 | health
  68 | mother
  69 | question
  70 | period
  71 | name
  72 | book
  73 | level
  74 | child
  75 | control
  76 | society
  77 | minister
  78 | view
  79 | door
  80 | line
  81 | community
  82 | south
  83 | city
  84 | god
  85 | father
  86 | centre
  87 | effect
  88 | staff
  89 | position
  90 | kind
  91 | job
  92 | woman
  93 | action
  94 | management
  95 | act
  96 | process
  97 | north
  98 | age
  99 | evidence
 100 | idea
 101 | west
 102 | support
 103 | moment
 104 | sense
 105 | report
 106 | mind
 107 | church
 108 | morning
 109 | death
 110 | change
 111 | industry
 112 | land
 113 | care
 114 | century
 115 | range
 116 | table
 117 | back
 118 | trade
 119 | history
 120 | study
 121 | street
 122 | committee
 123 | rate
 124 | word
 125 | food
 126 | language
 127 | experience
 128 | result
 129 | team
 130 | other
 131 | sir
 132 | section
 133 | programme
 134 | air
 135 | authority
 136 | role
 137 | reason
 138 | price
 139 | town
 140 | class
 141 | nature
 142 | subject
 143 | department
 144 | union
 145 | bank
 146 | member
 147 | value
 148 | need
 149 | east
 150 | practice
 151 | type
 152 | paper
 153 | date
 154 | decision
 155 | figure
 156 | right
 157 | wife
 158 | president
 159 | university
 160 | friend
 161 | club
 162 | quality
 163 | voice
 164 | lord
 165 | stage
 166 | king
 167 | us
 168 | situation
 169 | light
 170 | tax
 171 | production
 172 | march
 173 | secretary
 174 | art
 175 | board
 176 | may
 177 | hospital
 178 | month
 179 | music
 180 | cost
 181 | field
 182 | award
 183 | issue
 184 | bed
 185 | project
 186 | chapter
 187 | girl
 188 | game
 189 | amount
 190 | basis
 191 | knowledge
 192 | approach
 193 | series
 194 | love
 195 | top
 196 | news
 197 | front
 198 | future
 199 | manager
 200 | account
 201 | computer
 202 | security
 203 | rest
 204 | labour
 205 | structure
 206 | hair
 207 | bill
 208 | heart
 209 | force
 210 | attention
 211 | movement
 212 | success
 213 | letter
 214 | agreement
 215 | capital
 216 | analysis
 217 | population
 218 | environment
 219 | performance
 220 | model
 221 | material
 222 | theory
 223 | growth
 224 | fire
 225 | chance
 226 | boy
 227 | relationship
 228 | son
 229 | sea
 230 | record
 231 | size
 232 | property
 233 | space
 234 | term
 235 | director
 236 | plan
 237 | behaviour
 238 | treatment
 239 | energy
 240 | st
 241 | peter
 242 | income
 243 | cup
 244 | scheme
 245 | design
 246 | response
 247 | association
 248 | choice
 249 | pressure
 250 | hall
 251 | couple
 252 | technology
 253 | defence
 254 | list
 255 | chairman
 256 | loss
 257 | activity
 258 | contract
 259 | county
 260 | wall
 261 | paul
 262 | difference
 263 | army
 264 | hotel
 265 | sun
 266 | product
 267 | summer
 268 | set
 269 | village
 270 | colour
 271 | floor
 272 | season
 273 | unit
 274 | park
 275 | hour
 276 | investment
 277 | test
 278 | garden
 279 | husband
 280 | employment
 281 | style
 282 | science
 283 | look
 284 | deal
 285 | charge
 286 | help
 287 | economy
 288 | new
 289 | page
 290 | risk
 291 | advice
 292 | event
 293 | picture
 294 | commission
 295 | fish
 296 | college
 297 | oil
 298 | doctor
 299 | opportunity
 300 | film
 301 | conference
 302 | operation
 303 | application
 304 | press
 305 | extent
 306 | addition
 307 | station
 308 | window
 309 | shop
 310 | access
 311 | region
 312 | doubt
 313 | majority
 314 | degree
 315 | television
 316 | blood
 317 | statement
 318 | sound
 319 | election
 320 | parliament
 321 | site
 322 | mark
 323 | importance
 324 | title
 325 | species
 326 | increase
 327 | return
 328 | concern
 329 | public
 330 | competition
 331 | software
 332 | glass
 333 | lady
 334 | answer
 335 | earth
 336 | daughter
 337 | purpose
 338 | responsibility
 339 | leader
 340 | river
 341 | eye
 342 | ability
 343 | appeal
 344 | opposition
 345 | campaign
 346 | respect
 347 | task
 348 | instance
 349 | sale
 350 | whole
 351 | officer
 352 | method
 353 | division
 354 | source
 355 | piece
 356 | pattern
 357 | lack
 358 | disease
 359 | equipment
 360 | surface
 361 | oxford
 362 | demand
 363 | post
 364 | mouth
 365 | radio
 366 | provision
 367 | attempt
 368 | sector
 369 | firm
 370 | status
 371 | peace
 372 | variety
 373 | teacher
 374 | show
 375 | speaker
 376 | baby
 377 | arm
 378 | base
 379 | miss
 380 | safety
 381 | trouble
 382 | culture
 383 | direction
 384 | context
 385 | character
 386 | box
 387 | discussion
 388 | past
 389 | weight
 390 | organisation
 391 | start
 392 | brother
 393 | league
 394 | condition
 395 | machine
 396 | argument
 397 | sex
 398 | budget
 399 | english
 400 | transport
 401 | share
 402 | mum
 403 | cash
 404 | principle
 405 | exchange
 406 | aid
 407 | library
 408 | version
 409 | rule
 410 | tea
 411 | balance
 412 | afternoon
 413 | reference
 414 | protection
 415 | truth
 416 | district
 417 | turn
 418 | smith
 419 | review
 420 | minute
 421 | duty
 422 | survey
 423 | presence
 424 | influence
 425 | stone
 426 | dog
 427 | benefit
 428 | collection
 429 | executive
 430 | speech
 431 | function
 432 | queen
 433 | marriage
 434 | stock
 435 | failure
 436 | kitchen
 437 | student
 438 | effort
 439 | holiday
 440 | career
 441 | attack
 442 | length
 443 | horse
 444 | progress
 445 | plant
 446 | visit
 447 | relation
 448 | ball
 449 | memory
 450 | bar
 451 | opinion
 452 | quarter
 453 | impact
 454 | scale
 455 | race
 456 | image
 457 | trust
 458 | justice
 459 | edge
 460 | gas
 461 | railway
 462 | expression
 463 | advantage
 464 | gold
 465 | wood
 466 | network
 467 | text
 468 | forest
 469 | sister
 470 | chair
 471 | cause
 472 | foot
 473 | rise
 474 | half
 475 | winter
 476 | corner
 477 | insurance
 478 | step
 479 | damage
 480 | credit
 481 | pain
 482 | possibility
 483 | legislation
 484 | strength
 485 | speed
 486 | crime
 487 | hill
 488 | debate
 489 | will
 490 | supply
 491 | present
 492 | confidence
 493 | mary
 494 | patient
 495 | wind
 496 | solution
 497 | band
 498 | museum
 499 | farm
 500 | pound
 501 | henry
 502 | match
 503 | assessment
 504 | message
 505 | football
 506 | no
 507 | animal
 508 | skin
 509 | scene
 510 | article
 511 | stuff
 512 | introduction
 513 | play
 514 | administration
 515 | fear
 516 | dad
 517 | proportion
 518 | island
 519 | contact
 520 | japan
 521 | claim
 522 | kingdom
 523 | video
 524 | tv
 525 | existence
 526 | telephone
 527 | move
 528 | traffic
 529 | distance
 530 | relief
 531 | cabinet
 532 | unemployment
 533 | reality
 534 | target
 535 | trial
 536 | rock
 537 | concept
 538 | spirit
 539 | accident
 540 | organization
 541 | construction
 542 | coffee
 543 | phone
 544 | distribution
 545 | train
 546 | sight
 547 | difficulty
 548 | factor
 549 | exercise
 550 | weekend
 551 | battle
 552 | prison
 553 | grant
 554 | aircraft
 555 | tree
 556 | bridge
 557 | strategy
 558 | contrast
 559 | communication
 560 | background
 561 | shape
 562 | wine
 563 | star
 564 | hope
 565 | selection
 566 | detail
 567 | user
 568 | path
 569 | client
 570 | search
 571 | master
 572 | rain
 573 | offer
 574 | goal
 575 | dinner
 576 | freedom
 577 | attitude
 578 | while
 579 | agency
 580 | seat
 581 | manner
 582 | favour
 583 | fig.
 584 | pair
 585 | crisis
 586 | smile
 587 | prince
 588 | danger
 589 | call
 590 | capacity
 591 | output
 592 | note
 593 | procedure
 594 | theatre
 595 | tour
 596 | recognition
 597 | middle
 598 | absence
 599 | sentence
 600 | package
 601 | track
 602 | card
 603 | sign
 604 | commitment
 605 | player
 606 | threat
 607 | weather
 608 | element
 609 | conflict
 610 | notice
 611 | victory
 612 | bottom
 613 | finance
 614 | fund
 615 | violence
 616 | file
 617 | profit
 618 | standard
 619 | jack
 620 | route
 621 | china
 622 | expenditure
 623 | second
 624 | discipline
 625 | cell
 626 | pp.
 627 | reaction
 628 | castle
 629 | congress
 630 | individual
 631 | lead
 632 | consideration
 633 | debt
 634 | option
 635 | payment
 636 | exhibition
 637 | reform
 638 | emphasis
 639 | spring
 640 | audience
 641 | feature
 642 | touch
 643 | estate
 644 | assembly
 645 | volume
 646 | youth
 647 | contribution
 648 | curriculum
 649 | appearance
 650 | martin
 651 | tom
 652 | boat
 653 | institute
 654 | membership
 655 | branch
 656 | bus
 657 | waste
 658 | heat
 659 | neck
 660 | object
 661 | captain
 662 | driver
 663 | challenge
 664 | conversation
 665 | occasion
 666 | code
 667 | crown
 668 | birth
 669 | silence
 670 | literature
 671 | faith
 672 | hell
 673 | entry
 674 | transfer
 675 | gentleman
 676 | bag
 677 | coal
 678 | investigation
 679 | leg
 680 | belief
 681 | total
 682 | major
 683 | document
 684 | description
 685 | murder
 686 | aim
 687 | manchester
 688 | flight
 689 | conclusion
 690 | drug
 691 | tradition
 692 | pleasure
 693 | connection
 694 | owner
 695 | treaty
 696 | tony
 697 | alan
 698 | desire
 699 | professor
 700 | copy
 701 | ministry
 702 | acid
 703 | palace
 704 | address
 705 | institution
 706 | lunch
 707 | generation
 708 | partner
 709 | engine
 710 | newspaper
 711 | cross
 712 | reduction
 713 | welfare
 714 | definition
 715 | key
 716 | release
 717 | vote
 718 | examination
 719 | judge
 720 | atmosphere
 721 | leadership
 722 | sky
 723 | breath
 724 | creation
 725 | row
 726 | guide
 727 | milk
 728 | cover
 729 | screen
 730 | intention
 731 | criticism
 732 | jones
 733 | silver
 734 | customer
 735 | journey
 736 | explanation
 737 | green
 738 | measure
 739 | brain
 740 | significance
 741 | phase
 742 | injury
 743 | run
 744 | coast
 745 | technique
 746 | valley
 747 | drink
 748 | magazine
 749 | potential
 750 | drive
 751 | revolution
 752 | bishop
 753 | settlement
 754 | christ
 755 | metal
 756 | motion
 757 | index
 758 | adult
 759 | inflation
 760 | sport
 761 | surprise
 762 | pension
 763 | factory
 764 | tape
 765 | flow
 766 | iron
 767 | trip
 768 | lane
 769 | pool
 770 | independence
 771 | hole
 772 | un
 773 | flat
 774 | content
 775 | pay
 776 | noise
 777 | combination
 778 | session
 779 | appointment
 780 | fashion
 781 | consumer
 782 | accommodation
 783 | temperature
 784 | mike
 785 | religion
 786 | author
 787 | nation
 788 | northern
 789 | sample
 790 | assistance
 791 | interpretation
 792 | aspect
 793 | display
 794 | shoulder
 795 | agent
 796 | gallery
 797 | republic
 798 | cancer
 799 | proposal
 800 | sequence
 801 | simon
 802 | ship
 803 | interview
 804 | vehicle
 805 | democracy
 806 | improvement
 807 | involvement
 808 | general
 809 | enterprise
 810 | van
 811 | meal
 812 | breakfast
 813 | motor
 814 | channel
 815 | impression
 816 | tone
 817 | sheet
 818 | pollution
 819 | bob
 820 | beauty
 821 | square
 822 | vision
 823 | spot
 824 | distinction
 825 | brown
 826 | crowd
 827 | fuel
 828 | desk
 829 | sum
 830 | decline
 831 | revenue
 832 | fall
 833 | diet
 834 | bedroom
 835 | soil
 836 | reader
 837 | shock
 838 | fruit
 839 | behalf
 840 | deputy
 841 | roof
 842 | nose
 843 | steel
 844 | co
 845 | artist
 846 | graham
 847 | plate
 848 | song
 849 | maintenance
 850 | formation
 851 | grass
 852 | spokesman
 853 | ice
 854 | talk
 855 | program
 856 | link
 857 | ring
 858 | expert
 859 | establishment
 860 | plastic
 861 | candidate
 862 | rail
 863 | passage
 864 | joe
 865 | parish
 866 | ref
 867 | emergency
 868 | liability
 869 | identity
 870 | location
 871 | framework
 872 | strike
 873 | countryside
 874 | map
 875 | lake
 876 | household
 877 | approval
 878 | border
 879 | bottle
 880 | bird
 881 | constitution
 882 | autumn
 883 | cat
 884 | agriculture
 885 | concentration
 886 | guy
 887 | dress
 888 | victim
 889 | mountain
 890 | editor
 891 | theme
 892 | error
 893 | loan
 894 | stress
 895 | recovery
 896 | electricity
 897 | recession
 898 | wealth
 899 | request
 900 | comparison
 901 | lewis
 902 | white
 903 | walk
 904 | focus
 905 | chief
 906 | parent
 907 | sleep
 908 | mass
 909 | jane
 910 | bush
 911 | foundation
 912 | bath
 913 | item
 914 | lifespan
 915 | lee
 916 | publication
 917 | decade
 918 | beach
 919 | sugar
 920 | height
 921 | charity
 922 | writer
 923 | panel
 924 | struggle
 925 | dream
 926 | outcome
 927 | efficiency
 928 | offence
 929 | resolution
 930 | reputation
 931 | specialist
 932 | taylor
 933 | pub
 934 | co-operation
 935 | port
 936 | incident
 937 | representation
 938 | bread
 939 | chain
 940 | initiative
 941 | clause
 942 | resistance
 943 | mistake
 944 | worker
 945 | advance
 946 | empire
 947 | notion
 948 | mirror
 949 | delivery
 950 | chest
 951 | licence
 952 | frank
 953 | average
 954 | awareness
 955 | travel
 956 | expansion
 957 | block
 958 | alternative
 959 | chancellor
 960 | meat
 961 | store
 962 | self
 963 | break
 964 | drama
 965 | corporation
 966 | currency
 967 | extension
 968 | convention
 969 | partnership
 970 | skill
 971 | furniture
 972 | round
 973 | regime
 974 | inquiry
 975 | rugby
 976 | philosophy
 977 | scope
 978 | gate
 979 | minority
 980 | intelligence
 981 | restaurant
 982 | consequence
 983 | mill
 984 | golf
 985 | retirement
 986 | priority
 987 | plane
 988 | gun
 989 | gap
 990 | core
 991 | uncle
 992 | thatcher
 993 | fun
 994 | arrival
 995 | snow
 996 | no
 997 | command
 998 | abuse
 999 | limit
1000 | championship


--------------------------------------------------------------------------------