├── tests ├── __init__.py ├── test_inspirobot.py ├── test_pexels.py ├── test_pixabay.py ├── test_shitpostbot.py ├── test_unsplash.py ├── test_wikihow.py ├── test_phrasefinder.py ├── test_goodreads.py ├── test_reddit.py ├── test_conceptnet.py ├── time_test.py ├── test_talkgenerator_multipletopics.py ├── test_random_util.py ├── test_slide_topic_generators.py ├── test_specific_text_generators.py ├── test_talkgenerator.py ├── test_language_util.py └── test_text_generator.py ├── talkgenerator ├── schema │ ├── __init__.py │ ├── presentation_schema_types.py │ ├── content_generator_structures.py │ ├── slide_topic_generators.py │ └── content_generators.py ├── slide │ ├── __init__.py │ ├── slide_deck.py │ ├── slides.py │ └── powerpoint_slide_creator.py ├── sources │ ├── __init__.py │ ├── inspirobot.py │ ├── goodreads.py │ ├── reddit.py │ ├── pexels.py │ ├── pixabay.py │ ├── phrasefinder.py │ ├── unsplash.py │ ├── shitpostbot.py │ ├── conceptnet.py │ ├── wikihow.py │ ├── text_generator.py │ └── chart.py ├── util │ ├── __init__.py │ ├── scraper_util.py │ ├── cache_util.py │ ├── random_util.py │ ├── os_util.py │ ├── language_util.py │ └── generator_util.py ├── datastructures │ ├── __init__.py │ ├── image_data.py │ └── slide_generator_data.py ├── __init__.py ├── data │ ├── powerpoint │ │ └── template.pptx │ ├── text-templates │ │ ├── triple_captions.txt │ │ ├── about_me_title.txt │ │ ├── history.txt │ │ ├── conclusion_title.txt │ │ ├── anecdote_title.txt │ │ ├── deep_abstract.txt │ │ ├── history_person.txt │ │ ├── historic_double_captions.txt │ │ ├── anticipation_title.txt │ │ ├── inspiration.txt │ │ ├── default_slide_title.txt │ │ ├── bold_statements.txt │ │ ├── anecdote_prompt.txt │ │ ├── double_captions.txt │ │ ├── conclusion_tuple.json │ │ ├── chart_texts.json │ │ ├── talk_subtitle.json │ │ └── talk_title.json │ ├── images │ │ ├── black-transparent.png │ │ ├── black-transparent.psd │ │ └── error_placeholder.png │ ├── prohibited_images │ │ ├── denied.jpg │ │ ├── imgur_removed.jpg │ │ ├── huge_domains_ad.gif │ │ ├── imgur_removed_2.jpg │ │ ├── tinypic_removed.png │ │ └── tinypic_removed2.jpg │ └── eval │ │ └── common_words.txt ├── run.py ├── runtime_checker.py ├── settings.py └── generator.py ├── .pre-commit-config.yaml ├── SECURITY.md ├── pytest.ini ├── run_nltk_download.py ├── requirements.txt ├── LICENSE ├── .circleci └── config.yml ├── setup.py ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talkgenerator/schema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talkgenerator/slide/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talkgenerator/sources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talkgenerator/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talkgenerator/datastructures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talkgenerator/__init__.py: -------------------------------------------------------------------------------- 1 | name = "talkgenerator" 2 | -------------------------------------------------------------------------------- /talkgenerator/data/powerpoint/template.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/powerpoint/template.pptx -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/triple_captions.txt: -------------------------------------------------------------------------------- 1 | Past|Currently|Future 2 | Danger 1|Danger 2|Danger 3 3 | Good|Better|Best 4 | Bad|Worse|Worst -------------------------------------------------------------------------------- /talkgenerator/data/images/black-transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/images/black-transparent.png -------------------------------------------------------------------------------- /talkgenerator/data/images/black-transparent.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/images/black-transparent.psd -------------------------------------------------------------------------------- /talkgenerator/data/images/error_placeholder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/images/error_placeholder.png -------------------------------------------------------------------------------- /talkgenerator/data/prohibited_images/denied.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/denied.jpg -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: stable 4 | hooks: 5 | - id: black 6 | language_version: python3.7 -------------------------------------------------------------------------------- /talkgenerator/data/prohibited_images/imgur_removed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/imgur_removed.jpg -------------------------------------------------------------------------------- /talkgenerator/data/prohibited_images/huge_domains_ad.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/huge_domains_ad.gif -------------------------------------------------------------------------------- /talkgenerator/data/prohibited_images/imgur_removed_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/imgur_removed_2.jpg -------------------------------------------------------------------------------- /talkgenerator/data/prohibited_images/tinypic_removed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/tinypic_removed.png -------------------------------------------------------------------------------- /talkgenerator/data/prohibited_images/tinypic_removed2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/korymath/talk-generator/HEAD/talkgenerator/data/prohibited_images/tinypic_removed2.jpg -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | | Version | Supported | 6 | | ------- | ------------------ | 7 | | 3.0 | :white_check_mark: | 8 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | log_cli=true 3 | log_cli_level=DEBUG 4 | log_date_format=%Y-%m-%d %H:%M:%S 5 | log_format=[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s 6 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/about_me_title.txt: -------------------------------------------------------------------------------- 1 | About me 2 | A little bit about me 3 | Who am I? 4 | Some things about me 5 | My background 6 | Personal background 7 | {presenter} 8 | {presenter}'s bio 9 | Short bio -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/history.txt: -------------------------------------------------------------------------------- 1 | History 2 | Some {seed.singular.title} History 3 | Historic Background 4 | Important Figures in {seed.title} History 5 | Quick Historical Note 6 | History of {seed.plural.title} -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/conclusion_title.txt: -------------------------------------------------------------------------------- 1 | Conclusions 2 | Conclusions 3 | Conclusion 4 | Key Points to End 5 | Ending Points 6 | To Wrap Up: 7 | Remember: 8 | Final points 9 | {title}? 10 | "{title}" 11 | To end: "{title}" -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/anecdote_title.txt: -------------------------------------------------------------------------------- 1 | My story about {seed.title} 2 | Funny thing that once happened with {seed.a} 3 | My {seed.title} Experience 4 | How I got into {topic.title}? 5 | Anecdote Time! 6 | Little Anecdote 7 | Anecdote 8 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/deep_abstract.txt: -------------------------------------------------------------------------------- 1 | Why? 2 | But... Why? 3 | What if ... ? 4 | And? 5 | Where to look? 6 | Let's go! 7 | What? 8 | How? 9 | Interaction 10 | Now what? 11 | What should we do about this? 12 | What can YOU do about this? 13 | How can YOU help? -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/history_person.txt: -------------------------------------------------------------------------------- 1 | Historical {seed.title} People 2 | The First Person to {seed.wikihow_action} 3 | The Inventor of {seed.wikihow_action.ing} 4 | First Olympic "{seed.wikihow_action.ing.dashes}" Champion 5 | First Winner of the "{seed.wikihow_action.ing.dashes}" Championships -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/historic_double_captions.txt: -------------------------------------------------------------------------------- 1 | Before|Afterwards 2 | First|Then 3 | Initially|Afterwards 4 | Early 1990's|Couple of years later 5 | Before the invention of {seed.plural.lower}|After 6 | Before the invention of {seed.wikihow_action.ing.lower}|After 7 | What people looked like before they knew how to {seed.wikihow_action.lower}|After they realised the importance of {seed.plural} -------------------------------------------------------------------------------- /tests/test_inspirobot.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from talkgenerator.sources.inspirobot import get_random_inspirobot_image 4 | 5 | 6 | class InspirobotTest(unittest.TestCase): 7 | def test_something(self): 8 | image = get_random_inspirobot_image() 9 | self.assertIsNotNone(image) 10 | print(image) 11 | 12 | 13 | if __name__ == '__main__': 14 | unittest.main() 15 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/anticipation_title.txt: -------------------------------------------------------------------------------- 1 | Let's look at this 2 | Look at this! 3 | Now this: 4 | This amazed me earlier 5 | Can you see it? 6 | How does {seed.singular} help? 7 | Getting {seed.plural.title} Involved 8 | How to {seed.wikihow_action} 9 | "But what about {seed.plural}?" 10 | "But what do you do with {seed.plural}?", well... 11 | What I say to people complaining about {seed.plural} -------------------------------------------------------------------------------- /talkgenerator/run.py: -------------------------------------------------------------------------------- 1 | from talkgenerator import generator 2 | 3 | 4 | def main(args): 5 | """Main run method for command line talk generation.""" 6 | presentations, slide_deck, output_file = generator.generate_presentation_using_cli_arguments( 7 | args 8 | ) 9 | 10 | 11 | def main_cli(): 12 | args = generator.get_argument_parser().parse_args() 13 | main(args) 14 | 15 | 16 | if __name__ == "__main__": 17 | main_cli() 18 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/inspiration.txt: -------------------------------------------------------------------------------- 1 | Something to think about 2 | Some Inspiration 3 | Something my father always told me 4 | My mother always told me 5 | My grandma always said: 6 | Think about it... 7 | Consequence: 8 | Thus... 9 | Logical Implication 10 | Logical Implication for {seed.plural.title} 11 | Implication 12 | Something that motivated me 13 | Food for Thought 14 | My Life Motto 15 | Best Thing About {seed.plural.title} 16 | What {seed.plural.title} Really Think: -------------------------------------------------------------------------------- /talkgenerator/util/scraper_util.py: -------------------------------------------------------------------------------- 1 | def create_page_scraper(scraping_function): 2 | def scrape_pages(search_term, amount): 3 | results = [] 4 | page = 1 5 | while len(results) < amount: 6 | new_quotes = scraping_function(search_term, page) 7 | if not new_quotes: 8 | break 9 | results.extend(new_quotes) 10 | page += 1 11 | 12 | return results[0:amount] 13 | 14 | return scrape_pages 15 | -------------------------------------------------------------------------------- /tests/test_pexels.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from talkgenerator.sources import pexels 4 | 5 | 6 | class PexelsTest(unittest.TestCase): 7 | def test_pexels_access(self): 8 | images = pexels.search_photos("office") 9 | self.assertTrue(len(images) > 0) 10 | sources = [ 11 | image.get_source() for image in images if image.get_source() is not None 12 | ] 13 | self.assertTrue(len(sources) > 0) 14 | 15 | 16 | if __name__ == "__main__": 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /talkgenerator/util/cache_util.py: -------------------------------------------------------------------------------- 1 | # from https://stackoverflow.com/questions/1151658/python-hashable-dicts 2 | class HashableDict(dict): 3 | """ A hashable version of a dictionary, useful for when a function needs to be cached but uses a dict as an 4 | argument """ 5 | 6 | def __key(self): 7 | return tuple((k, self[k]) for k in sorted(self)) 8 | 9 | def __hash__(self): 10 | return hash(self.__key()) 11 | 12 | def __eq__(self, other): 13 | return self.__key() == other.__key() 14 | -------------------------------------------------------------------------------- /tests/test_pixabay.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from talkgenerator.sources import unsplash, pixabay 3 | 4 | 5 | class PixabayTest(unittest.TestCase): 6 | def test_pixabay_access(self): 7 | images = pixabay.search_photos("office chair") 8 | self.assertTrue(len(images) > 0) 9 | sources = [ 10 | image.get_source() for image in images if image.get_source() is not None 11 | ] 12 | self.assertTrue(len(sources) > 0) 13 | 14 | 15 | if __name__ == "__main__": 16 | unittest.main() 17 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/default_slide_title.txt: -------------------------------------------------------------------------------- 1 | {seed.plural.title} 2 | {seed.plural.title} 3 | About {seed.is_noun.plural.title} 4 | About {seed.is_noun.plural.title} 5 | About {seed.is_noun.plural.title} 6 | What about {seed.is_noun.plural.title}? 7 | What about {seed.is_verb.ing.title}? 8 | Let's look at {seed.is_noun.plural.title} 9 | Let's Discuss {seed.is_noun.plural.title} 10 | Let's look at {seed.is_verb.ing.title} 11 | Let's Discuss {seed.is_verb.ing.title} 12 | Getting {seed.is_noun.plural.title} involved 13 | Getting {seed.is_verb.ing.title} involved 14 | Discussion -------------------------------------------------------------------------------- /talkgenerator/runtime_checker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import talkgenerator.settings 4 | import talkgenerator.util.language_util 5 | 6 | logger = logging.getLogger("talkgenerator") 7 | 8 | 9 | def check_runtime_environment(): 10 | check_env = talkgenerator.settings.check_environment_variables() 11 | if check_env: 12 | logger.info("Successful check: Environment variables") 13 | 14 | check_ntlk = talkgenerator.util.language_util.check_and_download() 15 | if check_ntlk: 16 | logger.info("Successful check: NLTK Dictionaries available") 17 | 18 | return check_ntlk 19 | -------------------------------------------------------------------------------- /talkgenerator/util/random_util.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | # From https://stackoverflow.com/questions/14992521/python-weighted-random 5 | def weighted_random(pairs): 6 | if len(pairs) == 0: 7 | return None 8 | total = sum(pair[0] for pair in pairs) 9 | r = random.uniform(0, total) 10 | for (weight, value) in pairs: 11 | r -= weight 12 | if r <= 0: 13 | return value 14 | 15 | 16 | def choice_optional(lst): 17 | """" Returns random.choice if there are elements, None otherwise """ 18 | if len(lst) > 0: 19 | return random.choice(lst) 20 | return None 21 | -------------------------------------------------------------------------------- /tests/test_shitpostbot.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from talkgenerator.sources import shitpostbot 3 | 4 | 5 | class ShitPostBot(unittest.TestCase): 6 | def test_shitpostbot_search(self): 7 | image_urls = shitpostbot.search_images("cat") 8 | self.assertTrue(len(image_urls) > 0) 9 | 10 | def test_shitpostbot_search_rated(self): 11 | image_urls = shitpostbot.search_images_rated("cat") 12 | self.assertTrue(len(image_urls) > 0) 13 | # Check if the rating of the first one is large 14 | self.assertTrue(int(image_urls[0][0]) > 20) 15 | 16 | 17 | if __name__ == "__main__": 18 | unittest.main() 19 | -------------------------------------------------------------------------------- /talkgenerator/datastructures/image_data.py: -------------------------------------------------------------------------------- 1 | class ImageData: 2 | def __init__(self, image_url: str, source: str = None): 3 | self._image_url = image_url 4 | self._source = source 5 | 6 | def get_image_url(self) -> str: 7 | return self._image_url 8 | 9 | def get_source(self) -> str: 10 | return self._source 11 | 12 | def __str__(self): 13 | return ( 14 | "ImageData(" 15 | + self._image_url 16 | + ((", " + self._source) if self._source is not None else "") 17 | + ")" 18 | ) 19 | 20 | def __repr__(self): 21 | return str(self) 22 | -------------------------------------------------------------------------------- /run_nltk_download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """NLTK downloading with SSL handling 3 | """ 4 | 5 | import ssl 6 | import nltk 7 | 8 | 9 | try: 10 | _create_unverified_https_context = ssl._create_unverified_context # pylint: disable=protected-access 11 | except AttributeError: 12 | pass 13 | else: 14 | ssl._create_default_https_context = _create_unverified_https_context # pylint: disable=protected-access 15 | 16 | 17 | if __name__ == "__main__": 18 | nltk.download('punkt') 19 | nltk.download('averaged_perceptron_tagger') 20 | nltk.download('wordnet') 21 | nltk.download('pros_cons') 22 | nltk.download('reuters') 23 | -------------------------------------------------------------------------------- /tests/test_unsplash.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from talkgenerator.sources import unsplash 3 | 4 | 5 | class UnsplashTest(unittest.TestCase): 6 | def test_unsplash_access(self): 7 | images = unsplash.search_photos("office") 8 | self.assertTrue(len(images) > 0) 9 | sources = [ 10 | image.get_source() for image in images if image.get_source() is not None 11 | ] 12 | self.assertTrue(len(sources) > 0) 13 | 14 | def test_unsplash_random(self): 15 | image = unsplash.random() 16 | print(image) 17 | self.assertTrue(image) 18 | 19 | 20 | if __name__ == "__main__": 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /talkgenerator/sources/inspirobot.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from talkgenerator.util import os_util 4 | from talkgenerator.datastructures.image_data import ImageData 5 | 6 | 7 | def get_random_inspirobot_image(_=None): 8 | # Generate a random url to access inspirobot 9 | dd = str(random.randint(1, 73)).zfill(2) 10 | nnnn = random.randint(0, 9998) 11 | inspirobot_url = ("http://generated.inspirobot.me/" "0{}/aXm{}xjU.jpg").format( 12 | dd, nnnn 13 | ) 14 | 15 | # Download the image 16 | # image_url = os_util.to_actual_file( 17 | # "downloads/inspirobot/{}-{}.jpg".format(dd, nnnn) 18 | # ) 19 | # os_util.download_image(inspirobot_url, image_url) 20 | 21 | return ImageData(image_url=inspirobot_url, source="Inspirobot") 22 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/bold_statements.txt: -------------------------------------------------------------------------------- 1 | Don't We All {action}? 2 | What's the Best Way to {action}? 3 | I hate {action.ing.title} 4 | Is There a Way to Avoid Having to {action}? 5 | The Hidden Link Between {topic.plural.title} and {action.ing.title} 6 | But What's the Effect of {action.ing} on {topic.plural.title}? 7 | Don't {action}. Just {step}! 8 | Don't {action}. Just {topic.wikihow_action}! 9 | It's Hard to {action} in {location} 10 | Everything You Know About {action.ing} is Wrong! 11 | Life hack: Always {action}! 12 | I Will Teach You How To {action}! 13 | Life Advice: {action}! 14 | Life Advice: Never {action}! 15 | WARNING: Never {action}! 16 | Friendly Reminder to {action} 17 | When in Doubt: {action} 18 | {seed.conceptnet_location} is the best place to do {action}! -------------------------------------------------------------------------------- /tests/test_wikihow.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from talkgenerator.sources import wikihow 3 | 4 | 5 | class WikiHowTest(unittest.TestCase): 6 | def test_wrong_wikihow_links_regression_test(self): 7 | actions = wikihow.get_related_wikihow_actions_basic_search("cat") 8 | print(actions) 9 | self.assertFalse("articles from wikiHow" in actions) 10 | 11 | def test_no_views_in_wikihow_action(self): 12 | actions = wikihow.get_related_wikihow_actions("grass") 13 | for action in actions: 14 | # No line breaks allowed 15 | self.assertFalse("\n" in action) 16 | # No number of views 17 | self.assertFalse(" views" in action and "Updated" in action) 18 | 19 | 20 | if __name__ == "__main__": 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /tests/test_phrasefinder.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from talkgenerator.sources import phrasefinder 4 | 5 | 6 | class PhraseFinderTest(unittest.TestCase): 7 | def test_phrasefinder_absolute_frequency(self): 8 | self.assertEqual(7506109, phrasefinder.get_absolute_frequency("cat")) 9 | 10 | def test_phrasefinder_absolute_frequency_any_casing(self): 11 | self.assertEqual( 12 | 10307263, phrasefinder.get_absolute_frequency_any_casing("cat") 13 | ) 14 | 15 | def test_phrasefinder_rarest_word(self): 16 | self.assertEqual("cat", phrasefinder.get_rarest_word("Why I love my cat")) 17 | self.assertEqual( 18 | "Peace", phrasefinder.get_rarest_word("Peace is what I want most") 19 | ) 20 | 21 | 22 | if __name__ == "__main__": 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /tests/test_goodreads.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from talkgenerator.sources import goodreads 4 | 5 | 6 | class GoodReadsTest(unittest.TestCase): 7 | def test_cat_search(self): 8 | cat_quotes = goodreads.search_quotes("cat", 5) 9 | # Check if starts with quote marks 10 | self.assertEqual('"', cat_quotes[0][0]) 11 | 12 | def test_too_many_quotes_amount(self): 13 | quotes = goodreads.search_quotes("cat nine tails", 25) 14 | # Check if starts with quote marks 15 | self.assertTrue(bool(quotes) and len(quotes) > 0) 16 | 17 | def test_no_quotes(self): 18 | quotes = goodreads.search_quotes("qsdfqsdfq", 100) 19 | # Check if starts with quote marks 20 | self.assertTrue(len(quotes) == 0) 21 | 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/anecdote_prompt.txt: -------------------------------------------------------------------------------- 1 | The time I got to {seed.wikihow_action.2_to_1_pronouns.lower} 2 | My first time {seed.wikihow_action.ing.2_to_1_pronouns.lower} 3 | What happened during my first time {seed.wikihow_action.ing.2_to_1_pronouns.lower} 4 | When did I learn how to {seed.wikihow_action.2_to_1_pronouns.lower}? 5 | My {seed.title.dashes} Accident 6 | My story about {seed.plural} 7 | Funny thing that once happened with my {seed} 8 | My {seed.plural.title} Experience: how I learned to not {seed.wikihow_action.lower}... 9 | My experience with {seed.plural} 10 | How did I get into {topic.plural}? Because of {seed.plural}! 11 | Funny story about why I first learned how to {seed.wikihow_action.2_to_1_pronouns.lower} 12 | What I always say to people who {seed.wikihow_action} 13 | "But what about {seed.wikihow_action.ing.lower}?" Well... 14 | Why would people like me ever {seed.wikihow_action.2_to_1_pronouns}? -------------------------------------------------------------------------------- /tests/test_reddit.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from talkgenerator.sources import reddit 4 | from talkgenerator.schema.content_generator_structures import RedditImageSearcher 5 | 6 | 7 | class RedditTest(unittest.TestCase): 8 | def test_reddit_search_image(self): 9 | result = reddit.search_subreddit( 10 | "memes", str("cat") + " nsfw:no (url:.jpg OR url:.png OR url:.gif)" 11 | ) 12 | print("Result from reddit is", result) 13 | self.assertIsNotNone(result) 14 | self.assertTrue(len(result) > 0) 15 | 16 | def test_reddit_simple(self): 17 | images = RedditImageSearcher("memes")("cat") 18 | self.assertTrue(len(images) > 0) 19 | sources = [ 20 | image.get_source() for image in images if image.get_source() is not None 21 | ] 22 | self.assertTrue(len(sources) > 0) 23 | 24 | 25 | if __name__ == "__main__": 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /tests/test_conceptnet.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import unittest 3 | 4 | from talkgenerator.sources import conceptnet 5 | 6 | 7 | class ConceptNetTest(unittest.TestCase): 8 | def test_conceptnet_standard(self): 9 | related_words = conceptnet.get_weighted_related_words("cat", 10) 10 | self.assertTrue(len(related_words) < 10) 11 | self.assertTrue(0 < len(related_words)) 12 | 13 | def test_conceptnet_only_english(self): 14 | related_words = conceptnet.get_weighted_related_words("crane", 50) 15 | print(related_words) 16 | self.assertFalse("" in related_words) 17 | self.assertFalse("erav" in related_words) 18 | self.assertFalse("ždral" in related_words) 19 | self.assertFalse("dral" in related_words) 20 | 21 | def test_conceptnet_multiword(self): 22 | related_words = conceptnet.get_weighted_related_words("my lap", 10) 23 | self.assertTrue(len(related_words) > 0) 24 | 25 | 26 | if __name__ == "__main__": 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.4 2 | attrs==20.1.0 3 | beautifulsoup4==4.9.1 4 | black==21.7b0 5 | certifi==2023.7.22 6 | chardet==3.0.4 7 | charset-normalizer==2.0.4 8 | click==7.1.2 9 | codecov==2.1.9 10 | coverage==5.2.1 11 | environs==8.0.0 12 | idna==2.10 13 | importlib-metadata==1.7.0 14 | inflect==4.1.0 15 | iniconfig==1.0.1 16 | joblib==1.2.0 17 | lxml==4.9.1 18 | marshmallow==3.7.1 19 | more-itertools==8.5.0 20 | mypy-extensions==0.4.3 21 | nltk>=3.6.4 22 | packaging==20.4 23 | pathspec==0.9.0 24 | pathtools==0.1.2 25 | pexels-api==1.0.1 26 | Pillow==9.3.0 27 | pluggy==0.13.1 28 | portalocker==2.0.0 29 | praw==7.5.0 30 | py==1.10.0 31 | pyparsing==2.4.7 32 | pytest==6.0.1 33 | pytest-cov==2.10.1 34 | python-dotenv==0.14.0 35 | python-pptx==0.6.18 36 | pyunsplash==1.0.0b9 37 | regex 38 | requests==2.31.0 39 | six==1.15.0 40 | soupsieve==2.0.1 41 | toml==0.10.1 42 | tomli==1.2.1 43 | tqdm==4.48.2 44 | tracery==0.1.1 45 | update-checker==0.18.0 46 | urllib3==1.25.10 47 | watchdog==0.10.3 48 | websocket-client==0.57.0 49 | XlsxWriter==1.3.3 50 | zipp==3.1.0 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2019 Kory Mathewson and Thomas Winters and Shaun Farrugia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /talkgenerator/sources/goodreads.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | from pathlib import Path 3 | 4 | import requests 5 | from bs4 import BeautifulSoup 6 | # from cachier import cachier 7 | 8 | from talkgenerator.util import scraper_util 9 | 10 | quote_search_url = ( 11 | "https://www.goodreads.com/search?page={}&q={" 12 | "}&search%5Bsource%5D=goodreads&search_type=quotes&tab=quotes " 13 | ) 14 | 15 | 16 | @lru_cache(maxsize=20) 17 | # @cachier(cache_dir=Path("..", "tmp").absolute()) 18 | def _search_quotes_page(search_term, page): 19 | url = quote_search_url.format(page, search_term.replace(" ", "+")) 20 | try: 21 | page = requests.get(url, timeout=5) 22 | except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e: 23 | return None 24 | if page: 25 | soup = BeautifulSoup(page.content, "html.parser") 26 | # Replace breaks with new lines 27 | for br in soup.find_all("br"): 28 | br.replace_with("\n") 29 | 30 | # Extract the right text parts 31 | quote_elements = soup.find_all("div", class_="quoteText") 32 | quotes = [ 33 | " ".join([part.strip() for part in quote.get_text().split("—")][0:-1]) 34 | for quote in quote_elements 35 | ] 36 | 37 | return quotes 38 | 39 | 40 | search_quotes = scraper_util.create_page_scraper(_search_quotes_page) 41 | -------------------------------------------------------------------------------- /tests/time_test.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from talkgenerator import generator 4 | from talkgenerator.util import os_util 5 | 6 | 7 | def run_time_test(start_idx, end_idx): 8 | words = os_util.read_lines("data/eval/common_words.txt")[start_idx:end_idx] 9 | result_file = open("data/eval/timings.txt", "a+") 10 | 11 | for topic in words: 12 | args = generator.get_argument_parser().parse_args( 13 | [ 14 | "--topic", 15 | topic, 16 | "--num_slides", 17 | "7", 18 | "--save_ppt", 19 | "True", 20 | "--open_ppt", 21 | "False", 22 | "--parallel", 23 | "True", 24 | ] 25 | ) 26 | 27 | start = time.process_time() 28 | clock_start = time.perf_counter() 29 | 30 | generator.generate_presentation_using_cli_arguments(args) 31 | 32 | end = time.process_time() 33 | clock_end = time.perf_counter() 34 | timing = end - start 35 | clock_timing = clock_end - clock_start 36 | print( 37 | "It took {} seconds to generate the presentation" 38 | + ", and {} seconds system-wide ".format(str(timing), str(clock_timing)) 39 | ) 40 | result_file.write(topic + ", " + str(timing) + ", " + str(clock_timing) + "\n") 41 | result_file.flush() 42 | 43 | result_file.close() 44 | 45 | 46 | # run_time_test(0, 200) 47 | -------------------------------------------------------------------------------- /tests/test_talkgenerator_multipletopics.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | from unittest import mock 4 | 5 | from talkgenerator.schema import slide_schemas 6 | from talkgenerator import generator 7 | from talkgenerator.slide import powerpoint_slide_creator 8 | from talkgenerator.util import os_util 9 | 10 | 11 | class TestTalkGenerator(unittest.TestCase): 12 | def setUp(self): 13 | random.seed(1) 14 | self.default_args = mock.Mock() 15 | self.default_args.configure_mock(topic="cat") 16 | self.default_args.configure_mock(num_slides=3) 17 | self.default_args.configure_mock(schema="default") 18 | self.default_args.configure_mock(title=None) 19 | self.default_args.configure_mock(parallel=True) 20 | self.default_args.configure_mock( 21 | output_folder=os_util.to_actual_file("../output/test/") 22 | ) 23 | self.default_args.configure_mock(open_ppt=False) 24 | self.default_args.configure_mock(save_ppt=True) 25 | self.default_args.configure_mock(int_seed=123) 26 | 27 | def test_multiple_topics(self): 28 | self.default_args.configure_mock(topic="cat, dog, bread, house") 29 | self.default_args.configure_mock(num_slides=6) 30 | ppt, _, _ = generator.generate_presentation_using_cli_arguments( 31 | self.default_args 32 | ) 33 | self.assertEqual(6, len(ppt.slides)) 34 | 35 | 36 | if __name__ == "__main__": 37 | unittest.main() 38 | 39 | 40 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/double_captions.txt: -------------------------------------------------------------------------------- 1 | Bad {seed.plural.title}|Good {seed.plural.title} 2 | Bad {seed.plural.title}|Worse {seed.plural.title} 3 | Bad|Good 4 | Bad|Worse 5 | Before|After 6 | Do {seed.unique.wikihow_action.lower}|Don't {seed.wikihow_action.lower} 7 | Don't {seed.unique.wikihow_action.lower}|Do {seed.wikihow_action.lower} 8 | Don't|Do 9 | Do|Don't 10 | Expectation|Reality 11 | Good|Bad 12 | Good|Better 13 | How I initially approached {seed.wikihow_action.ing.lower}|What I should have done 14 | How I see it|What it actually is 15 | How I see {seed.plural}:|What {seed.plural} actually are: 16 | How I used to {seed.unique.wikihow_action.lower}|After I learned how to {seed.wikihow_action.lower} 17 | How people used to {seed.wikihow_action.lower}|My proposal 18 | How people used to {seed.wikihow_action.lower}|Now 19 | How they see it|What it actually is 20 | How they see {seed.wikihow_action.ing.lower}|What it actually is 21 | How they see {seed.wikihow_action.ing.lower}|What {seed.singular} actually means 22 | Let's turn this:|Into this: 23 | Me|Them 24 | My {seed.singular.title} Plan|Their {seed.singular.title} Plan 25 | Past|Future 26 | Past|Present 27 | Plan|Execution 28 | Present|Future 29 | Previously|Currently 30 | Stereotype|Reality 31 | The Plan|How it worked out 32 | The {seed.title} Plan|How {seed.singular.lower} worked out 33 | What I initially did|The actual solution 34 | Why {seed.plural} are great|Why {seed.plural} are awful 35 | {seed.title}'s Stereotype|{seed.title}'s Reality 36 | {seed.unique.wikihow_action.title}|{seed.wikihow_action.title} -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Python CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | build: 8 | docker: 9 | - image: circleci/python:latest 10 | 11 | working_directory: ~/talkgenerator 12 | 13 | steps: 14 | - checkout 15 | 16 | # Download and cache dependencies 17 | - restore_cache: 18 | keys: 19 | - v1-dependencies-{{ checksum "requirements.txt" }} 20 | # fallback to using the latest cache if no exact match is found 21 | - v1-dependencies- 22 | 23 | - run: 24 | name: install dependencies 25 | command: | 26 | python3 -m venv venv 27 | . venv/bin/activate 28 | pip install --upgrade pip setuptools 29 | python3 -m pip install -r requirements.txt --use-feature=2020-resolver 30 | python run_nltk_download.py 31 | 32 | - save_cache: 33 | paths: 34 | - ./venv 35 | key: v1-dependencies-{{ checksum "requirements.txt" }} 36 | 37 | # run tests! 38 | - run: 39 | name: run tests 40 | command: | 41 | python3 -m venv venv 42 | . venv/bin/activate 43 | pip install --upgrade pip setuptools 44 | pip install pytest 45 | pip install pytest-cov 46 | pip install codecov 47 | pytest --cov=talkgenerator tests/ 48 | codecov --token=e25ce249-b3da-4d80-b8d3-074f7d288969 49 | 50 | - store_artifacts: 51 | path: test-reports 52 | destination: test-reports -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/conclusion_tuple.json: -------------------------------------------------------------------------------- 1 | { 2 | "two_conclusions": [ 3 | "Conclusion 1|Conclusion 2", 4 | "|", 5 | "Conclusion 1|{title}", 6 | "#first_description#|#punchline_description#", 7 | "#first_description#|#punchline_description#", 8 | "#first_description#|#punchline_description#" 9 | ], 10 | "three_conclusions": [ 11 | "Conclusion 1|Conclusion 2|Conclusion 3", 12 | "||", 13 | "Conclusion 1|Conclusion 2|{title}", 14 | "#first_description#|#first_description#|#punchline_description#", 15 | "#first_description#|#first_description#|#punchline_description#", 16 | "#first_description#|#first_description#|#punchline_description#" 17 | ], 18 | "first_description": [ 19 | "#random_nice_conclusion_word#", 20 | "#random_nice_conclusion_word#", 21 | "{seed.title}", 22 | "#positive_word.capitalize#", 23 | "{seed.plural.title} are #positive_word#", 24 | "The importance of {seed.wikihow_action.ing.lower}", 25 | "" 26 | ], 27 | "random_nice_conclusion_word": [ 28 | "Passion", 29 | "Love", 30 | "Remember", 31 | "Take care", 32 | "Be free", 33 | "Don't forget" 34 | ], 35 | "punchline_description": [ 36 | "Key Idea", 37 | "Life Lesson", 38 | "{title}", 39 | "Watch out", 40 | "Remember", 41 | "Not #positive_word#", 42 | "Do not {seed.wikihow_action}", 43 | "Avoid {seed.wikihow_action.ing.lower}", 44 | "" 45 | ], 46 | "positive_word": [ 47 | "great", 48 | "amazing", 49 | "wonderful", 50 | "our best friends", 51 | "necessary", 52 | "fabulous", 53 | "genius", 54 | "champions", 55 | "admirable", 56 | "a blessing", 57 | "original", 58 | "skilled" 59 | ] 60 | } -------------------------------------------------------------------------------- /talkgenerator/slide/slide_deck.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | 4 | from talkgenerator.slide.slides import Slide 5 | 6 | logger = logging.getLogger("talkgenerator") 7 | 8 | 9 | class SlideDeck: 10 | """ Represents a deck of Slide objects """ 11 | 12 | def __init__(self, size): 13 | self._size = size 14 | self._slides : List[Slide] = [None] * size 15 | 16 | def add_slide(self, slide_index: int, slide): 17 | self._slides[slide_index] = slide 18 | 19 | def is_complete(self): 20 | return len(self._slides) >= self._size and (None not in self._slides) 21 | 22 | def save_to_powerpoint(self, prs_template): 23 | """ Should generate a slide in the powerpoint """ 24 | if not self.is_complete(): 25 | logger.error( 26 | "ERROR: SOME SLIDES WERE NOT GENERATED: {}".format(self._slides) 27 | ) 28 | self._slides = [slide for slide in self._slides if slide is not None] 29 | return [slide.create_powerpoint_slide(prs_template) for slide in self._slides] 30 | 31 | def to_slide_deck_dictionary(self) -> List[dict]: 32 | return [slide.to_slide_dictionary() for slide in self._slides] 33 | 34 | def get_structured_data(self): 35 | """ Return slide deck as structured data for alternative presentation """ 36 | if not self.is_complete(): 37 | logger.error( 38 | "ERROR: SOME SLIDES WERE NOT GENERATED: {}".format(self._slides) 39 | ) 40 | self._slides = [slide for slide in self._slides if slide is not None] 41 | return [slide for slide in self._slides] 42 | 43 | def has_slide_nr(self, index): 44 | return 0 <= index < self._size and self._slides[index] is not None 45 | -------------------------------------------------------------------------------- /tests/test_random_util.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | 4 | from talkgenerator.util import random_util 5 | 6 | 7 | class RandomUtilTest(unittest.TestCase): 8 | def setUp(self): 9 | random.seed(1) 10 | 11 | def test_weighted_random_all_appear(self): 12 | possibilities = (1, "one"), (4, "four"), (6, "six"), (7, "seven") 13 | results = set() 14 | for i in range(10000): 15 | if len(results) == len(possibilities): 16 | break 17 | results.add(random_util.weighted_random(possibilities)) 18 | self.assertEqual({"one", "four", "six", "seven"}, results) 19 | 20 | def test_weighted_random_all_appear_double_values(self): 21 | possibilities = (0.1, "one"), (0.4, "four"), (0.6, "six"), (0.7, "seven") 22 | results = set() 23 | for _ in range(1000): 24 | if len(results) == len(possibilities): 25 | break 26 | results.add(random_util.weighted_random(possibilities)) 27 | self.assertEqual({"one", "four", "six", "seven"}, results) 28 | 29 | def test_weighted_random_all_appear_double_values_appearances(self): 30 | possibilities = (0.1, "one"), (0.4, "four") 31 | ones = 0 32 | fours = 0 33 | for _ in range(1000): 34 | generated = random_util.weighted_random(possibilities) 35 | if generated == "one": 36 | ones += 1 37 | elif generated == "four": 38 | fours += 1 39 | 40 | # Ones should appear 1/5 * 1000 ~ 200 times 41 | self.assertTrue(150 < ones < 250) 42 | # Ones should appear 4/5 * 1000 ~ 800 times 43 | self.assertTrue(750 < fours < 850) 44 | 45 | 46 | if __name__ == "__main__": 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /talkgenerator/sources/reddit.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from functools import lru_cache 4 | from pathlib import Path 5 | 6 | import praw 7 | # from cachier import cachier 8 | from prawcore import ResponseException 9 | from prawcore import RequestException 10 | 11 | from talkgenerator import settings 12 | 13 | singleton_reddit = None 14 | 15 | logger = logging.getLogger("talkgenerator") 16 | 17 | 18 | def get_reddit(): 19 | reddit = singleton_reddit 20 | if not bool(reddit): 21 | reddit = praw.Reddit(**settings.reddit_auth()) 22 | return reddit 23 | 24 | 25 | def has_reddit_access(): 26 | return bool(get_reddit()) 27 | 28 | 29 | def get_subreddit(name): 30 | if has_reddit_access(): 31 | subreddit = get_reddit().subreddit(name) 32 | if subreddit: 33 | return subreddit 34 | 35 | 36 | @lru_cache(maxsize=20) 37 | # @cachier(cache_dir=Path("..", "tmp").absolute(), stale_after=datetime.timedelta(weeks=2)) 38 | def search_subreddit(name, query, sort="relevance", limit=500, filter_nsfw=True): 39 | if has_reddit_access(): 40 | try: 41 | submissions = list( 42 | get_subreddit(name).search(query, sort=sort, limit=limit) 43 | ) 44 | 45 | if filter_nsfw: 46 | submissions = [ 47 | submission for submission in submissions if not submission.over_18 48 | ] 49 | return submissions 50 | 51 | except ResponseException as err: 52 | logger.error("Exception with accessing Reddit: {}".format(err)) 53 | except RequestException as err: 54 | logger.error("Exception with accessing Reddit: {}".format(err)) 55 | else: 56 | logger.warning("WARNING: No reddit access!") 57 | -------------------------------------------------------------------------------- /talkgenerator/sources/pexels.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import List 4 | 5 | # from cachier import cachier 6 | from pexels_api import API 7 | from talkgenerator import settings 8 | from talkgenerator.datastructures.image_data import ImageData 9 | 10 | logging.getLogger("pexels").setLevel(logging.DEBUG) 11 | logger = logging.getLogger("talkgenerator") 12 | 13 | 14 | def get_pexels_session(): 15 | creds = settings.pexels_auth() 16 | api = API(creds["pexels_key"]) 17 | return api 18 | 19 | 20 | pexels_session = get_pexels_session() 21 | 22 | 23 | # @cachier(cache_dir=Path("..", "tmp").absolute()) 24 | def _search_pexels(query): 25 | return pexels_session.search(query) 26 | 27 | 28 | def search_photos(query) -> List[ImageData]: 29 | if pexels_session and query: 30 | results = _search_pexels(query) 31 | if results and results["photos"]: 32 | images = [] 33 | for photo in results["photos"]: 34 | source = photo["src"] 35 | # link_download = ( 36 | # source["large"] 37 | # if "large" in source 38 | # else (source["original"] if "original" in source else photo["url"]) 39 | # ) 40 | link_download = source["original"] 41 | creator = ( 42 | (photo["photographer"] + " (via Pexels)") 43 | if "photographer" in photo 44 | else None 45 | ) 46 | images.append(ImageData(image_url=link_download, source=creator)) 47 | return images 48 | else: 49 | logger.warning( 50 | 'pexels could not find results for "{}", which might be due to missing/erroneous access keys'.format( 51 | query 52 | ) 53 | ) 54 | else: 55 | logger.warning("No active pexels session due to missing/wrong credentials.") 56 | -------------------------------------------------------------------------------- /tests/test_slide_topic_generators.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | 4 | from talkgenerator.schema.slide_topic_generators import SideTrackingTopicGenerator 5 | 6 | 7 | class SlideTopicGeneratorsTest(unittest.TestCase): 8 | def setUp(self) -> None: 9 | random.seed(123) 10 | 11 | def test_conceptnet_sidetracking_standard(self): 12 | generator = SideTrackingTopicGenerator(["cat"], 5) 13 | self.assertTrue( 14 | len([seed for seed in generator.all_seeds() if seed is None]) == 0 15 | ) 16 | 17 | def test_conceptnet_sidetracking_non_existing_topic(self): 18 | non_existing_word = "nonexistingword-bla-bla" 19 | generator = SideTrackingTopicGenerator([non_existing_word], 10) 20 | self.assertTrue( 21 | len([seed for seed in generator.all_seeds() if seed == non_existing_word]) 22 | == 10 23 | ) 24 | 25 | def test_conceptnet_sidetracking_hard_topic(self): 26 | generator = SideTrackingTopicGenerator(["scratch furniture"], 10) 27 | self.assertTrue( 28 | len([seed for seed in generator.all_seeds() if seed is None]) == 0 29 | ) 30 | 31 | def test_conceptnet_sidetracking_multi_topic(self): 32 | generator = SideTrackingTopicGenerator(["cat", "house", "dog"], 6) 33 | seeds = generator.all_seeds() 34 | print("multi_topic", seeds) 35 | self.assertTrue(seeds[0] == "cat") 36 | self.assertTrue(seeds[2] == "house") 37 | self.assertTrue(seeds[4] == "dog") 38 | # Nothing is none 39 | self.assertTrue( 40 | len([seed for seed in generator.all_seeds() if seed is None]) == 0 41 | ) 42 | 43 | def test_conceptnet_sidetracking_multi_topic_one_each(self): 44 | topics = ["cat", "house", "chicken", "horse", "dog"] 45 | generator = SideTrackingTopicGenerator(topics, len(topics)) 46 | self.assertEqual(topics, generator.all_seeds()) 47 | 48 | 49 | if __name__ == "__main__": 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /talkgenerator/sources/pixabay.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import requests 3 | from typing import List 4 | 5 | from talkgenerator import settings 6 | from talkgenerator.datastructures.image_data import ImageData 7 | 8 | logging.getLogger("pixabay").setLevel(logging.DEBUG) 9 | logger = logging.getLogger("talkgenerator") 10 | 11 | 12 | def get_pixabay_session(): 13 | creds = settings.pixabay_auth() 14 | api_key = creds["pixabay_key"] 15 | return api_key 16 | 17 | 18 | def search_horizontal(query): 19 | return search_photos(query, orientation="horizontal") 20 | 21 | 22 | def search_vertical(query): 23 | return search_photos(query, orientation="vertical") 24 | 25 | 26 | def search_photos(query, orientation="all") -> List[ImageData]: 27 | api_key = get_pixabay_session() 28 | logger.debug("pixabay_api_key: {}".format(api_key)) 29 | query = query.replace(' ', '+') 30 | logger.debug("pixabay.search_photos called with query: {}".format(query)) 31 | url_query = f"https://pixabay.com/api/?key={api_key}&q={query}&image_type=photo&orientation={orientation}" 32 | logger.debug("pixabay url_query: {}".format(url_query)) 33 | if api_key and url_query: 34 | results = requests.get(url=url_query) 35 | logger.debug("request response results: {}".format(results)) 36 | response_data = results.json() 37 | if results.status_code == 200 and response_data["hits"]: 38 | images = [] 39 | for photo in response_data["hits"]: 40 | link_download = photo["largeImageURL"] 41 | creator = photo["user"] + " (via Pixabay)" if "user" in photo else None 42 | images.append(ImageData(image_url=link_download, source=creator)) 43 | return images 44 | else: 45 | logger.warning( 46 | 'Pixabay could not find results for "{}", which might be due to missing/erroneous access keys'.format( 47 | query 48 | ) 49 | ) 50 | else: 51 | logger.warning("No active Pixabay session due to missing/wrong credentials.") 52 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os import listdir 2 | from os.path import isfile, join 3 | 4 | from setuptools import setup 5 | from setuptools import find_packages 6 | 7 | # Build a list of text-templates to install 8 | DATA_PATH = "talkgenerator/data/" 9 | text_templates_path = DATA_PATH + "text-templates/" 10 | text_template_files = [ 11 | f for f in listdir(text_templates_path) if isfile(join(text_templates_path, f)) 12 | ] 13 | all_text_templates = [] 14 | for f in text_template_files: 15 | all_text_templates.append(text_templates_path + f) 16 | 17 | 18 | prohibited_images_path = DATA_PATH + "prohibited_images/" 19 | prohibited_images_files = [ 20 | f 21 | for f in listdir(prohibited_images_path) 22 | if isfile(join(prohibited_images_path, f)) 23 | ] 24 | prohibited_images = [] 25 | for f in prohibited_images_files: 26 | prohibited_images.append(prohibited_images_path + f) 27 | 28 | with open('requirements.txt') as f: 29 | required = f.read().splitlines() 30 | 31 | setup( 32 | name="talkgenerator", 33 | version="3.0", 34 | description="Automatically generating presentation slide decks based on a given topic for improvised presentations", 35 | long_description="Check our GitHub repository on https://github.com/korymath/talk-generator for more information!", 36 | author="Thomas Winters, Kory Mathewson", 37 | author_email="info@thomaswinters.be", 38 | url="https://github.com/korymath/talk-generator", 39 | license="MIT License", 40 | platforms=["Mac", "Linux"], 41 | packages=find_packages(), # auto-discovery submodules ["talkgenerator"], 42 | package_dir={"talkgenerator": "talkgenerator"}, 43 | data_files=[ 44 | ("images", [DATA_PATH + "images/black-transparent.png"]), 45 | ("images", [DATA_PATH + "images/error_placeholder.png"]), 46 | ("powerpoint", [DATA_PATH + "powerpoint/template.pptx"]), 47 | ("prohibited_images", prohibited_images), 48 | ("text-templates", all_text_templates), 49 | ], 50 | include_package_data=True, 51 | install_requires=required, 52 | entry_points={"console_scripts": ["talkgenerator = talkgenerator.run:main_cli"]}, 53 | ) 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # VSCode Files (IDE) 3 | .vscode 4 | *.code-workspace 5 | 6 | # Mac hygiene 7 | .DS_Store 8 | 9 | # AWS Keys 10 | .aws 11 | 12 | # Runtime generated files 13 | downloads/* 14 | output/*.pptx 15 | data/eval/timings.txt 16 | 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | MANIFEST 43 | 44 | # PyInstaller 45 | # Usually these files are written by a python script from a template 46 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | .hypothesis/ 64 | .pytest_cache/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | db.sqlite3 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .idea/ 122 | output/ 123 | data/auth/reddit.json 124 | test_output/ 125 | data/auth/wikihow.json 126 | data/auth/pexels.json 127 | venv2/ 128 | venv3/ 129 | 130 | tmp/ 131 | -------------------------------------------------------------------------------- /talkgenerator/sources/phrasefinder.py: -------------------------------------------------------------------------------- 1 | from json import JSONDecodeError 2 | from pathlib import Path 3 | 4 | import requests 5 | # from cachier import cachier 6 | 7 | from talkgenerator.util import language_util 8 | 9 | URL = "https://api.phrasefinder.io/search?corpus=eng-us&query={}&nmax=1" 10 | 11 | 12 | # @cachier(cache_dir=Path("..", "tmp").absolute()) 13 | def _search(word): 14 | word.replace(" ", "%20") 15 | url = URL.format(word) 16 | try: 17 | result = requests.get(url) 18 | result = result.json() 19 | if result: 20 | return result["phrases"] 21 | except JSONDecodeError: 22 | return None 23 | 24 | 25 | def _get_absolute_frequencies(word): 26 | pf_results = _search(word) 27 | if pf_results: 28 | absolute_frequencies = [] 29 | for word_count in pf_results: 30 | word = word_count["tks"][0]["tt"] 31 | count = word_count["mc"] 32 | absolute_frequencies.append((word, count)) 33 | return absolute_frequencies 34 | 35 | 36 | def get_absolute_frequency(word): 37 | absolute_frequencies = _get_absolute_frequencies(word) 38 | absolute_frequency = list( 39 | filter(lambda word_count: word_count[0] == word, absolute_frequencies) 40 | ) 41 | if len(absolute_frequency) == 1: 42 | return absolute_frequency[0][1] 43 | 44 | 45 | def get_absolute_frequency_any_casing(word): 46 | absolute_frequencies = _get_absolute_frequencies(word) 47 | if absolute_frequencies: 48 | return sum(map(lambda word_count: word_count[1], absolute_frequencies)) 49 | 50 | 51 | def get_rarest_word(sentence): 52 | words = [ 53 | language_util.replace_non_alphabetical_characters(word) 54 | for word in sentence.split(" ") 55 | ] 56 | words = filter(lambda word: word is not None and len(word.strip()) > 0, words) 57 | 58 | min_word = None 59 | min_freq = -1 60 | for word in words: 61 | freq = get_absolute_frequency_any_casing(word) 62 | if freq is not None and (min_freq == -1 or freq < min_freq): 63 | min_word = word 64 | min_freq = freq 65 | return min_word 66 | # return min(words, key=lambda word: get_absolute_frequency_any_casing(word)) 67 | -------------------------------------------------------------------------------- /talkgenerator/util/os_util.py: -------------------------------------------------------------------------------- 1 | import ntpath 2 | import os 3 | import logging 4 | import pathlib 5 | import sys 6 | from functools import lru_cache 7 | from typing import Union 8 | 9 | import requests 10 | from PIL import Image 11 | from PIL.Image import DecompressionBombError 12 | 13 | # import tempfile 14 | from talkgenerator.datastructures.image_data import ImageData 15 | 16 | logger = logging.getLogger("talkgenerator") 17 | 18 | 19 | def to_actual_file(filename=""): 20 | """Return the path to the filename specified. 21 | This is used most often to get the path of data files.""" 22 | 23 | util_folder = os.path.dirname((os.path.dirname(os.path.abspath(__file__)))) 24 | return os.path.join(util_folder, filename) 25 | 26 | 27 | @lru_cache(maxsize=20) 28 | def read_lines(filename): 29 | actual_file = to_actual_file(filename) 30 | return [line.rstrip("\n") for line in open(actual_file)] 31 | 32 | 33 | @lru_cache(maxsize=20) 34 | def open_image(filename): 35 | try: 36 | return Image.open(filename) 37 | except DecompressionBombError: 38 | return None 39 | 40 | 41 | _PROHIBITED_IMAGES_DIR = "data/prohibited_images/" 42 | 43 | 44 | @lru_cache(maxsize=1) 45 | def get_prohibited_images(): 46 | actual_dir = to_actual_file(_PROHIBITED_IMAGES_DIR) 47 | return list( 48 | [open_image(os.path.join(actual_dir, url)) for url in os.listdir(actual_dir)] 49 | ) 50 | 51 | 52 | @lru_cache(maxsize=20) 53 | def is_image(content: Union[str, ImageData]): 54 | if isinstance(content, ImageData): 55 | return True 56 | else: 57 | return _is_image_path(content) 58 | 59 | 60 | def _is_image_path(content: str): 61 | if not bool(content) or bool(content) is content or not content.lower: 62 | return False 63 | lower_url = content.lower() 64 | return ( 65 | ".jpg" in lower_url 66 | or ".gif" in lower_url 67 | or ".png" in lower_url 68 | or ".jpeg" in lower_url 69 | ) 70 | 71 | 72 | def show_logs(given_logger: logging.Logger): 73 | given_logger.setLevel(logging.DEBUG) 74 | handler = logging.StreamHandler(sys.stdout) 75 | handler.setLevel(logging.DEBUG) 76 | formatter = logging.Formatter( 77 | "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" 78 | ) 79 | handler.setFormatter(formatter) 80 | given_logger.addHandler(handler) 81 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/chart_texts.json: -------------------------------------------------------------------------------- 1 | { 2 | "origin": [ 3 | "#yes_no_question#" 4 | ], 5 | "yes_no_question": [ 6 | "Do you know what {seed.a} is?", 7 | "Do you like {seed.wikihow_action.ing.lower}?", 8 | "Would you like to {seed.wikihow_action.lower}?", 9 | "Are you afraid of {seed.wikihow_action.ing.lower}?", 10 | "Would you accept it if your #family_member# started to {seed.wikihow_action.lower}?", 11 | "Do you want to learn how to {seed.wikihow_action.lower}?", 12 | "Do you know how to {seed.wikihow_action.lower}?", 13 | "Do you agree that {seed.a} belongs in {seed.conceptnet_location}" 14 | ], 15 | "location_question": [ 16 | "Places where you can find {seed.singular.a.lower}", 17 | "Places you will find {seed.plural.lower}", 18 | "{seed.singular.a.title}'s Favourite Location" 19 | ], 20 | "property_question": [ 21 | "My favourite properties of {seed.a}", 22 | "What I admire most about {seed.plural}", 23 | "What {seed.plural} mostly are" 24 | ], 25 | "correlation_title": [ 26 | "Correlation between {x_label.get_last_noun_and_article.plural} and {y_label.plural}", 27 | "Relation between {x_label.get_last_noun_and_article.plural} and {y_label.plural}", 28 | "Influence of {x_label.get_last_noun_and_article.plural} on {y_label.plural}", 29 | "Correlation between {x_label.is_verb.ing} and {y_label.plural}", 30 | "Relation between {x_label.is_verb.ing} and {y_label.plural}", 31 | "Influence of {x_label.is_verb.ing} on {y_label.plural}", 32 | "{x_label} VS {y_label.plural}" 33 | ], 34 | "funny_yes_no_answer": [ 35 | "Absolutely", 36 | "Absolutely not", 37 | "I'd rather die", 38 | "Can you repeat the question?", 39 | "Can I instead just {seed.wikihow_action.2_to_1_pronouns.lower}?", 40 | "I'd rather {seed.wikihow_action.2_to_1_pronouns.lower}", 41 | "But sir, I *AM* {seed.a}", 42 | "{chart_title.get_last_noun_and_article.2_to_1_pronouns.title}#?#", 43 | "HOW DARE YOU SPEAK ABOUT {chart_title.get_last_noun_and_article.2_to_1_pronouns.upper.plural}#?#", 44 | "I'd rather not talk about {chart_title.get_last_noun_and_article.2_to_1_pronouns.lower}", 45 | "My beliefs don't allow that", 46 | "My #family_member# doesn't allow me that", 47 | "Who wouldn't?" 48 | ], 49 | "?": [ 50 | "?", 51 | "??", 52 | "???" 53 | ], 54 | "family_member": [ 55 | "kid", 56 | "daughter", 57 | "son", 58 | "loved one" 59 | ] 60 | } -------------------------------------------------------------------------------- /tests/test_specific_text_generators.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | 4 | from talkgenerator.schema.content_generator_structures import ( 5 | create_tracery_generator, 6 | create_templated_text_generator, 7 | ) 8 | 9 | default_number_of_generations = 100 10 | default_arguments = {"seed": "house", "presenter": "A. Nonymous", "topic": "house"} 11 | 12 | 13 | class SpecificTextGeneratorTest(unittest.TestCase): 14 | def setUp(self): 15 | random.seed(1) 16 | 17 | def _tracery_tester( 18 | self, file_location, grammar_element="origin", print_generations=False 19 | ): 20 | tracery_generator = create_tracery_generator(file_location, grammar_element) 21 | generations = [ 22 | tracery_generator(default_arguments) 23 | for _ in range(0, default_number_of_generations) 24 | ] 25 | if print_generations: 26 | print("\n".join(generations)) 27 | self.assertEqual(len(generations), default_number_of_generations) 28 | 29 | def _templated_text_generator_tester( 30 | self, 31 | file_location, 32 | print_generations=False, 33 | number_of_generations=default_number_of_generations, 34 | seed=default_arguments["seed"], 35 | ): 36 | templated_generator = create_templated_text_generator(file_location) 37 | arguments = dict(default_arguments) 38 | arguments["seed"] = seed 39 | generations = [ 40 | templated_generator(arguments) for _ in range(0, number_of_generations) 41 | ] 42 | if print_generations: 43 | print("\n".join(generations)) 44 | self.assertEqual(len(generations), number_of_generations) 45 | return generations 46 | 47 | def test_talk_title_generator(self): 48 | self._tracery_tester("data/text-templates/talk_title.json") 49 | 50 | def test_talk_subtitle_generator(self): 51 | self._tracery_tester("data/text-templates/talk_subtitle.json", "job") 52 | 53 | def test_anecdote_prompt_generator(self): 54 | self._templated_text_generator_tester( 55 | "data/text-templates/anecdote_prompt.txt", True 56 | ) 57 | 58 | def test_captions_generator(self): 59 | generations = self._templated_text_generator_tester( 60 | "data/text-templates/double_captions.txt", 61 | True, 62 | number_of_generations=100, 63 | seed="cat", 64 | ) 65 | 66 | for generation in generations: 67 | self.assertTrue(" List[ImageData]: 60 | if unsplash_session and query: 61 | results = unsplash_session.search(type_="photos", query=query) 62 | if results and results.body: 63 | images = [] 64 | for photo in results.entries: 65 | images.append(_map_to_image_data(photo)) 66 | return images 67 | else: 68 | logger.warning( 69 | 'Unsplash could not find results for "{}", which might be due to missing/erroneous access keys'.format( 70 | query 71 | ) 72 | ) 73 | elif unsplash_session and not query: 74 | return random_as_list() 75 | else: 76 | logger.warning("No active Unsplash session due to missing/wrong credentials.") 77 | -------------------------------------------------------------------------------- /tests/test_talkgenerator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import logging 3 | import unittest 4 | from unittest import mock 5 | 6 | from talkgenerator.schema import slide_schemas 7 | from talkgenerator import generator 8 | from talkgenerator.slide import powerpoint_slide_creator 9 | from talkgenerator.util import os_util 10 | 11 | 12 | class TestTalkGenerator(unittest.TestCase): 13 | def setUp(self): 14 | random.seed(1) 15 | self.default_args = mock.Mock() 16 | self.default_args.configure_mock(topic="cat") 17 | self.default_args.configure_mock(num_slides=3) 18 | self.default_args.configure_mock(schema="default") 19 | self.default_args.configure_mock(title=None) 20 | self.default_args.configure_mock(parallel=True) 21 | self.default_args.configure_mock( 22 | output_folder=os_util.to_actual_file("../output/test/") 23 | ) 24 | self.default_args.configure_mock(open_ppt=False) 25 | self.default_args.configure_mock(save_ppt=True) 26 | self.default_args.configure_mock(int_seed=123) 27 | 28 | def test_serial(self): 29 | self.default_args.configure_mock(parallel=False) 30 | ppt, _, _ = generator.generate_presentation_using_cli_arguments( 31 | self.default_args 32 | ) 33 | 34 | self.assertEqual(3, len(ppt.slides)) 35 | 36 | def test_to_dictionary(self): 37 | _, slide_deck, _ = generator.generate_presentation( 38 | schema="default", 39 | slides=3, 40 | topic="cat", 41 | title=None, 42 | presenter=None, 43 | parallel=True, 44 | int_seed=123, 45 | save_ppt=False, 46 | open_ppt=False, 47 | print_logs=False, 48 | ) 49 | slides_dict = slide_deck.to_slide_deck_dictionary() 50 | logging.info(slides_dict) 51 | self.assertIsNotNone(slides_dict) 52 | 53 | def test_all_slide_generators(self): 54 | basic_presentation_context = { 55 | "topic": "dog", 56 | "seed": "cat", 57 | "presenter": "An O. Nymous", 58 | "title": "Mock title", 59 | } 60 | 61 | presentation = powerpoint_slide_creator.create_new_powerpoint() 62 | 63 | for slide_generator in slide_schemas.all_slide_generators: 64 | logging.info("Testing Slide Generator: {}".format(slide_generator)) 65 | random.seed(123) 66 | slide, _ = slide_generator.generate( 67 | basic_presentation_context, [] 68 | ) 69 | slide.create_powerpoint_slide(presentation) 70 | 71 | 72 | if __name__ == "__main__": 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/talk_subtitle.json: -------------------------------------------------------------------------------- 1 | { 2 | "origin": [ 3 | "#subtitle#" 4 | ], 5 | "subtitle": [ 6 | "A presentation by #name_and_job#", 7 | "By #name_and_job#", 8 | "Presented by #name_and_job#" 9 | ], 10 | "name_and_job": [ 11 | "{presenter}, #job#", 12 | "{presenter},\n#job#" 13 | ], 14 | "title": [ 15 | "professor ", 16 | "dr ", 17 | "ir ", 18 | "", 19 | "" 20 | ], 21 | "knowledge_person": [ 22 | "Researcher", 23 | "Expert", 24 | "Professor", 25 | "Doctor", 26 | "Master", 27 | "Guru", 28 | "PhD", 29 | "Fanatic" 30 | ], 31 | "job": [ 32 | "#knowledge_person# in #science#", 33 | "#knowledge_person# of #science#", 34 | "#science# #knowledge_person#", 35 | "#science# #knowledge_person#" 36 | ], 37 | "science": [ 38 | "{seed.title} #science_type#", 39 | "{seed.title} #science_type#", 40 | "#science_specifier# {seed.title} #science_type#", 41 | "#science_specifier# {seed.title} #science_type#", 42 | "#science_specifier# {seed.title} #science_type#", 43 | "#science_prefix#{seed.title.last_letter_is_vowel}#science_suffix#", 44 | "#science_prefix#{seed.title.last_letter_is_consonant}o#science_suffix#", 45 | "#science_prefix#{seed.title} #science_type#", 46 | "#science_specifier# #science_prefix#{seed.title} #science_type#" 47 | ], 48 | "science_prefix": [ 49 | "Meta-", 50 | "Aero", 51 | "Bio", 52 | "Anti-", 53 | "Eco", 54 | "Electro", 55 | "Micro", 56 | "Macro", 57 | "Nano", 58 | "Poly" 59 | ], 60 | "science_suffix": [ 61 | "graphy", 62 | "logy", 63 | "nomy", 64 | "nomics", 65 | "mony", 66 | "nymy" 67 | ], 68 | "science_specifier": [ 69 | "Advanced", 70 | "Applied", 71 | "Comparative", 72 | "Descriptive", 73 | "Digital", 74 | "Dynamical", 75 | "Ecological", 76 | "Electronic", 77 | "Empirical", 78 | "Ethical", 79 | "Executive", 80 | "Exploratory", 81 | "Fundamental", 82 | "Instrumental", 83 | "Logical", 84 | "Molecular", 85 | "Moral", 86 | "Neural", 87 | "Nuclear", 88 | "Observational", 89 | "Philosophy of", 90 | "Physical", 91 | "Pure", 92 | "Quantum", 93 | "Renewable", 94 | "Social", 95 | "Statistical", 96 | "Systematic", 97 | "Theoretical" 98 | ], 99 | "science_type": [ 100 | "Biology", 101 | "Chemistry", 102 | "Design", 103 | "Dynamics", 104 | "Engineering", 105 | "Fusion", 106 | "Literature", 107 | "Logic", 108 | "Mechanics", 109 | "Medicine", 110 | "Methodology", 111 | "Philosophy", 112 | "Physics", 113 | "Science", 114 | "Statistics", 115 | "Studies", 116 | "Systems", 117 | "Technology", 118 | "Theory" 119 | ] 120 | } -------------------------------------------------------------------------------- /talkgenerator/sources/shitpostbot.py: -------------------------------------------------------------------------------- 1 | import random 2 | from functools import lru_cache 3 | from pathlib import Path 4 | 5 | import requests 6 | from bs4 import BeautifulSoup 7 | # from cachier import cachier 8 | 9 | from talkgenerator.util import scraper_util 10 | 11 | _MAX_RANDOM_PAGE = 150 12 | _SEARCH_URL = ( 13 | "https://www.shitpostbot.com/gallery/sourceimages?query={" 14 | "}&review_state=accepted&order=total_rating&direction=DESC&page={} " 15 | ) 16 | 17 | 18 | def _search_shitpostbot_page(search_term, page): 19 | return [element[1] for element in _search_shitpostbot_page_rated(search_term, page)] 20 | 21 | 22 | @lru_cache(maxsize=20) 23 | # @cachier(cache_dir=Path("..", "tmp").absolute()) 24 | def _search_shitpostbot_page_rated(search_term, page): 25 | url = _SEARCH_URL.format(search_term, page, search_term.replace(" ", "+")) 26 | page = requests.get(url) 27 | if page: 28 | soup = BeautifulSoup(page.content, "html.parser") 29 | 30 | post_entries = soup.find_all("div", class_="col-md-4") 31 | image_urls = [] 32 | for entry in post_entries: 33 | # Check if author doesn't have the search term (False positive) 34 | user = ( 35 | entry.find("div", class_="caption") 36 | .find_all("p")[1] 37 | .find("a") 38 | .get_text() 39 | ) 40 | if bool(search_term) and search_term in user: 41 | continue 42 | 43 | # Get real image url 44 | image_url = entry.find("img").get("src") 45 | image_url = _get_source_image(image_url) 46 | rating_div = entry.find("span", class_="rating") 47 | rating = int(rating_div.text if rating_div else 1) 48 | if rating > 0: 49 | image_urls.append((rating, image_url)) 50 | 51 | return image_urls 52 | 53 | 54 | source_image_prefix = "https://www.shitpostbot.com/img/sourceimages/" 55 | 56 | 57 | def _get_source_image(image_url): 58 | image_url = image_url.replace("%2F", "/") 59 | last_slash_idx = image_url.rfind("/") 60 | image_file_name = image_url[last_slash_idx + 1 :] 61 | return source_image_prefix + image_file_name 62 | 63 | 64 | def get_random_images(_): 65 | images = _search_shitpostbot_page("", random.choice(range(_MAX_RANDOM_PAGE))) 66 | return images 67 | 68 | 69 | def get_random_images_rated(_): 70 | images = _search_shitpostbot_page_rated("", random.choice(range(_MAX_RANDOM_PAGE))) 71 | return images 72 | 73 | 74 | _search_image_function = scraper_util.create_page_scraper(_search_shitpostbot_page) 75 | _search_image_function_rated = scraper_util.create_page_scraper( 76 | _search_shitpostbot_page_rated 77 | ) 78 | 79 | 80 | def search_images(search_term, number=50): 81 | return _search_image_function(search_term, number) 82 | 83 | 84 | def search_images_rated(search_term, number=50): 85 | return _search_image_function_rated(search_term, number) 86 | -------------------------------------------------------------------------------- /talkgenerator/settings.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from environs import Env 3 | 4 | 5 | logger = logging.getLogger("talkgenerator") 6 | env = Env() 7 | env.read_env() 8 | 9 | reddit_keys = ["REDDIT_CLIENT_ID", "REDDIT_CLIENT_SECRET", "REDDIT_USER_AGENT"] 10 | wikihow_keys = ["WIKIHOW_USERNAME", "WIKIHOW_PASSWORD"] 11 | unsplash_keys = [ 12 | "UNSPLASH_ACCESS_KEY", 13 | "UNSPLASH_SECRET_KEY", 14 | "UNSPLASH_REDIRECT_URI", 15 | "UNSPLASH_CODE", 16 | ] 17 | 18 | all_keys_to_check = { 19 | "Reddit": reddit_keys, 20 | "WikiHow": wikihow_keys, 21 | "Unsplash": unsplash_keys, 22 | } 23 | 24 | 25 | def reddit_auth(): 26 | return { 27 | "client_id": env.str("REDDIT_CLIENT_ID", ""), 28 | "client_secret": env.str("REDDIT_CLIENT_SECRET", ""), 29 | "user_agent": env.str("REDDIT_USER_AGENT", ""), 30 | } 31 | 32 | 33 | def wikihow_auth(): 34 | return { 35 | "username": env.str("WIKIHOW_USERNAME", ""), 36 | "password": env.str("WIKIHOW_PASSWORD", ""), 37 | } 38 | 39 | 40 | def unsplash_auth(): 41 | return { 42 | "unsplash_access_key": env.str("UNSPLASH_ACCESS_KEY", ""), 43 | "unsplash_secret_key": env.str("UNSPLASH_SECRET_KEY", ""), 44 | "unsplash_redirect_uri": env.str("UNSPLASH_REDIRECT_URI", ""), 45 | "unsplash_code": env.str("UNSPLASH_CODE", ""), 46 | } 47 | 48 | 49 | def pixabay_auth(): 50 | return {"pixabay_key": env.str("PIXABAY_KEY", "")} 51 | 52 | 53 | def pexels_auth(): 54 | return {"pexels_key": env.str("PEXELS_KEY", "")} 55 | 56 | 57 | def _get_missing_keys(key_variables): 58 | missing = [] 59 | for key_name in key_variables: 60 | if len(env.str(key_name, "").strip()) == 0: 61 | missing.append(key_name) 62 | return missing 63 | 64 | 65 | def check_keys(key_variables, name): 66 | missing = _get_missing_keys(key_variables) 67 | if len(missing) > 0: 68 | logger.warning("Missing keys for {}: {}".format(name, missing)) 69 | return False 70 | return True 71 | 72 | 73 | def check_environment_variables(): 74 | print("CHECKING ENVIRONMENT VARIABLES") 75 | valid_env_file = all( 76 | check_keys(all_keys_to_check[element], element) for element in all_keys_to_check 77 | ) 78 | 79 | if not valid_env_file: 80 | print_env_file_warning() 81 | 82 | return valid_env_file 83 | 84 | 85 | def print_env_file_warning(): 86 | env_message = """ 87 | Hi! Before you can run talkgenerator you need to set some secret keys in an .env file. 88 | 89 | Which keys? 90 | ------------- 91 | Take a look at https://github.com/korymath/talk-generator#setting-up-required-authentication 92 | 93 | Creating an .env file 94 | ------------- 95 | $ touch .env 96 | $ echo VARIABLE_NEEDED=VALUE >> .env 97 | $ echo OTHER_VARIABLE_NEEDED=VALUE >> .env 98 | 99 | or you can use your favorite text editor (vi, nano, etc) to create it. 100 | """ 101 | 102 | logger.error(env_message) 103 | -------------------------------------------------------------------------------- /talkgenerator/schema/presentation_schema_types.py: -------------------------------------------------------------------------------- 1 | from talkgenerator.schema.slide_schemas import * 2 | from talkgenerator.schema import slide_topic_generators 3 | from talkgenerator.schema.presentation_schema import PresentationSchema 4 | from talkgenerator.datastructures.slide_generator_data import ConstantWeightFunction 5 | from talkgenerator.datastructures.slide_generator_data import SlideGeneratorData 6 | from talkgenerator.slide import powerpoint_slide_creator 7 | from talkgenerator.slide import slide_generator_types 8 | 9 | # ================================== 10 | # ===== PRESENTATION SCHEMAS ===== 11 | # ================================== 12 | 13 | 14 | # This object holds all the information about how to generate the presentation 15 | presentation_schema = PresentationSchema( 16 | # Basic powerpoint generator 17 | powerpoint_creator=powerpoint_slide_creator.create_new_powerpoint, 18 | # Topic per slide generator 19 | seed_generator=slide_topic_generators.SideTrackingTopicGenerator, 20 | # Title of the presentation 21 | title_generator=talk_title_generator, 22 | # Slide generators 23 | slide_generators=all_slide_generators, 24 | # Max tags 25 | max_allowed_tags=default_max_allowed_tags, 26 | ) 27 | 28 | # Interview schema: Disallow about_me slides 29 | interview_max_allowed_tags = default_max_allowed_tags.copy() 30 | interview_max_allowed_tags["about_me"] = 0 31 | 32 | interview_schema = PresentationSchema( 33 | # Basic powerpoint generator 34 | powerpoint_creator=powerpoint_slide_creator.create_new_powerpoint, 35 | # Topic per slide generator 36 | seed_generator=slide_topic_generators.SideTrackingTopicGenerator, 37 | # Title of the presentation 38 | title_generator=talk_title_generator, 39 | # Slide generators 40 | slide_generators=all_slide_generators, 41 | # Max tags 42 | max_allowed_tags=interview_max_allowed_tags, 43 | ) 44 | 45 | # Test schema: for testing purposes 46 | 47 | test_schema = PresentationSchema( 48 | # Basic powerpoint generator 49 | powerpoint_slide_creator.create_new_powerpoint, 50 | # Title of the presentation 51 | title_generator=talk_title_generator, 52 | # Topic per slide generator 53 | # seed_generator=slide_topic_generators.SideTrackingTopicGenerator, 54 | seed_generator=slide_topic_generators.IdentityTopicGenerator, 55 | # Slide generators 56 | slide_generators=title_slide_generators 57 | + [ 58 | SlideGeneratorData( 59 | # slide_templates.generate_image_slide( 60 | slide_generator_types.ImageSlideGenerator.of( 61 | inspiration_title_generator, generate_unsplash_image 62 | ), 63 | weight_function=ConstantWeightFunction(8), 64 | allowed_repeated_elements=10, 65 | name="Test sourcing", 66 | ) 67 | ], 68 | # ignore_weights=True, 69 | ) 70 | 71 | 72 | # TED schema: using only images from approved sources 73 | ted_schema = PresentationSchema( 74 | # Basic powerpoint generator 75 | powerpoint_creator=powerpoint_slide_creator.create_new_powerpoint, 76 | # Topic per slide generator 77 | seed_generator=slide_topic_generators.SideTrackingTopicGenerator, 78 | # Title of the presentation 79 | title_generator=talk_ted_title_generator, 80 | # Slide generators 81 | slide_generators=title_slide_generators 82 | + history_slide_generators_copyright_free 83 | + single_image_slide_generators_copyright_free 84 | + statement_slide_generators_copyright_free 85 | + captioned_images_slide_generators_copyright_free 86 | + own_chart_generators 87 | + conclusion_slide_generators_copyright_free, 88 | # Max tags 89 | max_allowed_tags={ 90 | # Absolute maxima 91 | "title": 1, 92 | "history": 1, 93 | "anecdote": 1, 94 | "location_chart": 1, 95 | "chart": 1, 96 | "deep": 2, 97 | # Relative (procentual) maxima 98 | "two_captions": 0.3, 99 | "three_captions": 0.2, 100 | "multi_captions": 0.3, 101 | "gif": 0.5, 102 | "quote": 0.2, 103 | "statement": 0.2, 104 | }, 105 | ) 106 | 107 | schemas = { 108 | "default": presentation_schema, 109 | "interview": interview_schema, 110 | "test": test_schema, 111 | "ted": ted_schema, 112 | } 113 | 114 | 115 | def get_schema(name): 116 | return schemas[name] 117 | -------------------------------------------------------------------------------- /tests/test_language_util.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | 4 | from talkgenerator.util import language_util 5 | 6 | 7 | class LanguageUtilTest(unittest.TestCase): 8 | def setUp(self) -> None: 9 | random.seed(123) 10 | 11 | def test_check_and_download_no_exception(self): 12 | language_util.check_and_download() 13 | 14 | def test_to_plural(self): 15 | self.assertEqual("cats", language_util.to_plural("a cat")) 16 | self.assertEqual("cats", language_util.to_plural("cat")) 17 | self.assertEqual("cats", language_util.to_plural("cats")) 18 | 19 | def test_is_noun(self): 20 | self.assertTrue(language_util.is_noun("cat")) 21 | self.assertFalse(language_util.is_noun("see")) 22 | self.assertFalse(language_util.is_noun("because")) 23 | 24 | def test_is_verb(self): 25 | self.assertTrue(language_util.is_verb("see")) 26 | self.assertFalse(language_util.is_verb("cat")) 27 | self.assertFalse(language_util.is_verb("because")) 28 | 29 | def test_to_singular(self): 30 | self.assertEqual("cat", language_util.to_singular("cat")) 31 | self.assertEqual("cat", language_util.to_singular("cats")) 32 | 33 | def test_ing(self): 34 | self.assertEqual("toying", language_util.to_ing_form("toy")) 35 | self.assertEqual("playing", language_util.to_ing_form("play")) 36 | self.assertEqual("lying", language_util.to_ing_form("lie")) 37 | self.assertEqual("flying", language_util.to_ing_form("fly")) 38 | self.assertEqual("fleeing", language_util.to_ing_form("flee")) 39 | self.assertEqual("making", language_util.to_ing_form("make")) 40 | 41 | def test_verb_detection(self): 42 | self.assertEqual( 43 | "ACT like a cat", 44 | language_util.apply_function_to_verb("act like a cat", str.upper), 45 | ) 46 | # self.assertEqual("kitten PROOF your house", 47 | # language_util.apply_function_to_verb("kitten proof your house", str.upper)) 48 | 49 | def test_to_present_participle(self): 50 | self.assertEqual( 51 | "acting like a cat", language_util.to_present_participle("act like a cat") 52 | ) 53 | self.assertEqual( 54 | "quitly acting like a cat", 55 | language_util.to_present_participle("quitly act like a cat"), 56 | ) 57 | 58 | def test_replace(self): 59 | self.assertEqual( 60 | "this is your test", 61 | language_util.replace_word("this is my test", "my", "your"), 62 | ) 63 | self.assertEqual( 64 | "test if morphed, before comma", 65 | language_util.replace_word( 66 | "test if changed, before comma", "changed", "morphed" 67 | ), 68 | ) 69 | self.assertEqual( 70 | "Success capital", 71 | language_util.replace_word("Test capital", "test", "success"), 72 | ) 73 | self.assertEqual( 74 | "Your test is testing if your, is changed", 75 | language_util.replace_word( 76 | "My test is testing if my, is changed", "my", "your" 77 | ), 78 | ) 79 | self.assertEqual( 80 | "Last word is morphed", 81 | language_util.replace_word("Last word is changed", "changed", "morphed"), 82 | ) 83 | 84 | def test_get_last_noun_and_article(self): 85 | self.assertEqual( 86 | "a cat", 87 | language_util.get_last_noun_and_article("introduce your family to a cat"), 88 | ) 89 | self.assertEqual( 90 | "the family", 91 | language_util.get_last_noun_and_article("show your cat to the family"), 92 | ) 93 | self.assertEqual( 94 | "my cat", language_util.get_last_noun_and_article("What to do with my cat") 95 | ) 96 | self.assertEqual( 97 | "your cat", language_util.get_last_noun_and_article("do you like your cat") 98 | ) 99 | 100 | def test_replace_pronouns(self): 101 | self.assertEqual( 102 | "I care about me and my family", 103 | language_util.second_to_first_pronouns("I care about you and your family"), 104 | ) 105 | 106 | # def test_is_noun(self): 107 | # self.assertTrue(language_util.is_noun("cat")) 108 | # self.assertTrue(language_util.is_noun("dog")) 109 | # self.assertTrue(language_util.is_noun("food")) 110 | # self.assertTrue(language_util.is_noun("pet")) 111 | 112 | # def test_is_verb(self): 113 | # self.assertTrue(language_util.is_verb("act")) 114 | # self.assertTrue(language_util.is_verb("pet")) 115 | # self.assertTrue(language_util.is_verb("kiss")) 116 | 117 | # def test_is_verb_action(self): 118 | # self.assertTrue(language_util.is_verb("kill a mockingbird")) 119 | # self.assertTrue(language_util.is_verb("act like a cat")) 120 | # self.assertTrue(language_util.is_verb("speak English")) 121 | 122 | if __name__ == "__main__": 123 | unittest.main() 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Talk Powerpoint Generator 2 | 3 | [![CircleCI](https://circleci.com/gh/korymath/talk-generator.svg?style=svg&circle-token=dcba7d5a9ff7953cff0526e201990c0b811b3aae)](https://circleci.com/gh/korymath/talk-generator) 4 | [![codecov](https://codecov.io/gh/korymath/talk-generator/branch/master/graph/badge.svg?token=gqkCyuXop0)](https://codecov.io/gh/korymath/talk-generator) 5 | [![License](https://img.shields.io/github/license/mashape/apistatus.svg)](https://github.com/korymath/britbot/blob/master/LICENSE.md) 6 | 7 | This program automatically generates PowerPoints about any topic. 8 | These presentation slide decks can be used by improvisers for the improvisational comedy format *"Improvised TED talk"* or *"Powerpoint Karaoke"*. 9 | In such games, the actors have to present an unseen presentation slide deck, but pretend to be an expert and explain *"their"* slide show choices. 10 | 11 | ## Demo 12 | 13 | Ty out this generator on our online platform: [talkgenerator.com](http://talkgenerator.com/). 14 | 15 | ### Example 16 | 17 | ![Automatically Generated](https://media.giphy.com/media/MXXe522nIAA9JZjExI/giphy.gif) 18 | 19 | ## Easy Install and Run 20 | 21 | Our program relies on certain APIs that require authentication in order to use it. 22 | Create a file named `.env` (don't forget the period) in your project directory, and fill this with the correct API keys as described on our [wiki page about this](https://github.com/korymath/talk-generator/wiki/Setting-Up-API-Keys). 23 | 24 | ```sh 25 | # Make a new Python 3 virtual environment 26 | python3 -m venv venv; 27 | 28 | # Activate the virtual environment 29 | source venv/bin/activate; 30 | 31 | # Upgrade pip and install requirements 32 | pip install --upgrade pip setuptools; 33 | python3 -m pip install -r requirements.txt; 34 | 35 | # Download NLTK dependencies 36 | python run_nltk_download.py; 37 | 38 | # Install the Talk Generator 39 | pip install -e .; 40 | 41 | # Generate a 10 slide talk with topic peanuts 42 | talkgenerator --topic "peanuts" --num_slides 10 43 | ``` 44 | 45 | ### Run arguments 46 | 47 | | Argument | Description | 48 | | ---------------------- | ------------------------- | 49 | | `topic` | The topic of the generator. This works best if it is a common, well-known noun. Use comma-separated words to generate a slide deck about multiple topics | 50 | | `slides` | The number of slides in the generated presentation (*default: 10*) | 51 | | `schema` | The presentation schema to use when generating the presentation. Currently, only two modes are implemented, being `default` and `test` (for testing during development) | 52 | | `title` | Title of the presentation. Either `topic` or this one should to be set in order to generate a slide deck (just setting `topic` is usually more fun though) | 53 | | `presenter` | The name that will be present on the first slide. Leave blank for an automatically generated name | 54 | | `output_folder` | The folder to output the generated presentations (*default: `./output/`*) | 55 | | `save_ppt` | If this flag is true(*default*), the generated powerpoint will be saved on the computer in the `output_folder`| 56 | | `open_ppt` | If this flag is true (*default*), the generated powerpoint will automatically open after generating| 57 | | `parallel` | If this flag is true (*default*), the generator will generate all slides in parallel | 58 | 59 | ## Program structure 60 | 61 | See the [wiki](https://github.com/korymath/talk-generator/wiki/Program-structure) to know more about the inner implementation. 62 | 63 | ## Tests 64 | 65 | Test files are `tests/*.py`, prefixed with `test_`. Test files use the `unittest` module. 66 | They can easily be run all together when using PyCharm by right clicking on `talk-generator` and pressing *Run 'Unittests in talk-generator'* 67 | 68 | ```sh 69 | coverage run -m pytest; coverage html 70 | ``` 71 | 72 | Test coverage is automatically handled by `codecov`. Tests are automatically run with CircleCI based on the `.yml` file in the `.circleci` directory. 73 | 74 | ## Credits 75 | 76 | This generator is made by 77 | [Thomas Winters](https://github.com/TWinters) 78 | and [Kory Mathewson](https://github.com/korymath), 79 | with contributions from 80 | [Shaun Farrugia](https://github.com/h0h0h0) 81 | and [Julian Faid](https://github.com/jfaid). 82 | 83 | If you would like to refer to this project in academic work, please cite the following paper: 84 | 85 | Winters T., Mathewson K.W. (2019) **Automatically Generating Engaging Presentation Slide Decks**. In: Ekárt A., Liapis A., Castro Pena M. (eds) Computational Intelligence in Music, Sound, Art and Design. EvoMUSART 2019. Lecture Notes in Computer Science, vol 11453. Springer, Cham 86 | 87 | ```sh 88 | @InProceedings{winters2019tedric, 89 | author="Winters, Thomas 90 | and Mathewson, Kory W.", 91 | editor="Ek{\'a}rt, Anik{\'o} 92 | and Liapis, Antonios 93 | and Castro Pena, Mar{\'i}a Luz", 94 | title="Automatically Generating Engaging Presentation Slide Decks", 95 | booktitle="Computational Intelligence in Music, Sound, Art and Design", 96 | year="2019", 97 | publisher="Springer International Publishing", 98 | address="Cham", 99 | pages="127--141", 100 | isbn="978-3-030-16667-0" 101 | } 102 | ``` 103 | 104 | ## License 105 | 106 | MIT License. Copyright (c) 2018-2020 [Kory Mathewson](https://github.com/korymath) and [Thomas Winters](https://github.com/TWinters) 107 | -------------------------------------------------------------------------------- /tests/test_text_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import unittest 3 | 4 | from talkgenerator.sources import text_generator 5 | from talkgenerator.util import os_util 6 | 7 | 8 | class TextGeneratorTest(unittest.TestCase): 9 | def setUp(self) -> None: 10 | random.seed(123) 11 | 12 | def test_variable_extraction(self): 13 | self.assertEqual( 14 | {"test", "adjective"}, 15 | text_generator.get_format_variables("this {test} is going {adjective}"), 16 | ) 17 | self.assertEqual( 18 | {"test"}, 19 | text_generator.get_format_variables("this {test} is testing for {} empty"), 20 | ) 21 | self.assertEqual( 22 | {"test"}, 23 | text_generator.get_format_variables( 24 | "this {test} is testing if {test} only appears once" 25 | ), 26 | ) 27 | self.assertEqual( 28 | set(), 29 | text_generator.get_format_variables( 30 | "this test only has {} some {} empty names" 31 | ), 32 | ) 33 | 34 | def test_variable_extraction_with_commands(self): 35 | self.assertEqual( 36 | {"test", "adjective"}, 37 | text_generator.get_format_variables( 38 | "this {test.title.s} is going {adjective.lower}" 39 | ), 40 | ) 41 | self.assertEqual( 42 | {"test", "one"}, 43 | text_generator.get_format_variables( 44 | "this {test.title} is testing for {one}" 45 | ), 46 | ) 47 | self.assertEqual( 48 | {"test"}, 49 | text_generator.get_format_variables( 50 | "this {test.title} is testing for {} empty" 51 | ), 52 | ) 53 | 54 | def test_not_using_unusable_template(self): 55 | """ Tests if the generator is not raising an error when variables are missing to generate, and only uses other 56 | generator """ 57 | possible_templates = ["This is {adjective}", "This is {noun}"] 58 | templated_text_generator = text_generator.TemplatedTextGenerator( 59 | templates_list=possible_templates 60 | ) 61 | for _ in range(100): 62 | self.assertEqual( 63 | "This is possible", 64 | templated_text_generator.generate({"adjective": "possible"}), 65 | ) 66 | for _ in range(100): 67 | self.assertEqual( 68 | "This is a test", templated_text_generator.generate({"noun": "a test"}) 69 | ) 70 | 71 | def test_all_possible_outcomes(self): 72 | possible_templates = ["This is {adjective}", "This is {noun}"] 73 | templated_text_generator = text_generator.TemplatedTextGenerator( 74 | templates_list=possible_templates 75 | ) 76 | expected = {"This is possible", "This is a test"} 77 | all_generations = set() 78 | for _ in range(10000): 79 | if all_generations == expected: 80 | break 81 | all_generations.add( 82 | templated_text_generator.generate( 83 | {"adjective": "possible", "noun": "a test"} 84 | ) 85 | ) 86 | 87 | self.assertEqual(expected, all_generations) 88 | 89 | def test_variable_and_function_extraction(self): 90 | 91 | self.assertEqual( 92 | {("nice", ".title.lower.upper"), ("is", ".lower.ing"), ("test", ".title")}, 93 | text_generator.get_format_variables_and_functions( 94 | "this {is.lower.ing} a {test.title}, {nice.title.lower.upper} right?" 95 | ), 96 | ) 97 | 98 | def test_functions_on_variables(self): 99 | template_text_generator = text_generator.TemplatedTextGenerator( 100 | templates_list=["this is a {test.title}"] 101 | ) 102 | result = template_text_generator.generate({"test": "something"}) 103 | self.assertEqual("this is a Something", result) 104 | 105 | def test_functions_on_multiple_variables(self): 106 | template_text_generator = text_generator.TemplatedTextGenerator( 107 | templates_list=[ 108 | "this is a {test.title} using multiple {variable.plural.title}" 109 | ] 110 | ) 111 | result = template_text_generator.generate( 112 | {"test": "something", "variable": "instance"} 113 | ) 114 | self.assertEqual("this is a Something using multiple Instances", result) 115 | 116 | def test_tracery_grammar(self): 117 | tracery = text_generator.TraceryTextGenerator("data/text-templates/name.json") 118 | for i in range(5): 119 | self.assertTrue(tracery.generate()) 120 | 121 | def test_ted_title(self): 122 | tracery = text_generator.TraceryTextGenerator( 123 | "data/text-templates/talk_title.json", "ted_title" 124 | ) 125 | words = list(os_util.read_lines("data/eval/common_words.txt")) 126 | random.shuffle(words) 127 | words = words[0:10] 128 | generations = set() 129 | for i in range(100): 130 | topic = random.choice(words) 131 | generated = tracery.generate({"seed": topic}) 132 | generations.add(generated) 133 | self.assertTrue(generated) 134 | 135 | generations = list(generations) 136 | generations.sort() 137 | print("\n".join(generations)) 138 | 139 | 140 | if __name__ == "__main__": 141 | unittest.main() 142 | -------------------------------------------------------------------------------- /talkgenerator/data/text-templates/talk_title.json: -------------------------------------------------------------------------------- 1 | { 2 | "origin": [ 3 | "#ted_title#", 4 | "#ted_title#", 5 | "#in_between#", 6 | "#title#", 7 | "#title#", 8 | "#title#" 9 | ], 10 | "ted_title": [ 11 | "I Quit {seed.wikihow_action.ing.title} (and so should you)", 12 | "From {seed.title} to {seed.conceptnet_related_single_word.title}: {seed.wikihow_action.ing.title}", 13 | "Help, I Keep Having To {seed.wikihow_action.title}", 14 | "How many {seed.plural.lower} are \"too many {seed.plural.lower}\"?", 15 | "Never, Ever {seed.wikihow_action.title}", 16 | "Putting {seed.plural.title} On The Blockchain", 17 | "The #two_to_five# {seed.first_letter.title}'s of {seed.plural.title}", 18 | "The True Meaning of {seed.plural.title}", 19 | "The Unexpected Benefits of {seed.plural.title}", 20 | "The Way We Think about {seed.plural.title} is Dead Wrong", 21 | "The Worst Thing about {seed.plural.title} You Never Noticed", 22 | "Underwater {seed.plural.title}", 23 | "Want to be Happier? {seed.wikihow_action.title}!", 24 | "Want to Be Happy? {seed.wikihow_action.title}!", 25 | "Why People Call Me \"The {seed.singular.title}\"", 26 | "More {seed.plural.title}, More {seed.conceptnet_related_single_word.plural.title}", 27 | "More {seed.plural.title}, More Happiness", 28 | "Why We All Need to {seed.wikihow_action.title}" 29 | ], 30 | "in_between": [ 31 | "Less {seed.plural.title}, More Happiness", 32 | "Despite what you've heard, stupid {seed.plural} exists", 33 | "How {seed.plural.title} Can Change the World", 34 | "How {seed.plural.title} Caused My Bankruptcy", 35 | "How to {seed.wikihow_action.lower} (without getting fired)", 36 | "How {seed.plural} can save humanity and why we should legalize them everywhere now", 37 | "Why I Disallow My Children to {seed.wikihow_action.title.2_to_1_pronouns} (and so should you)", 38 | "You are {seed.wikihow_action.ing} (and Don't Even Know It)", 39 | "How I {seed.wikihow_action.title}: Confessions of {seed.title.a}" 40 | ], 41 | "title": [ 42 | "#two_or_larger# Things You Didn't Know About {seed.plural.title}", 43 | "#one_or_larger##number# Things You Didn't Know About {seed.plural.title}", 44 | "99 problems but {seed.a} ain't one", 45 | "Help, My Boyfriend Is Obsessed With {seed.plural.title}", 46 | "How {seed.singular.title.a} Made Me Feel like a Millionaire", 47 | "How I Held My {seed.plural.title} for #one_or_larger##number# Minutes", 48 | "How to Fail Miserably at {seed.wikihow_action.ing.title}", 49 | "How to Get Rid of {seed.plural.title}", 50 | "How to Let {seed.plural.title} Be Your Guide", 51 | "How to look like you're working while {seed.wikihow_action.ing.lower}", 52 | "How to {seed.wikihow_action} Most Effectively", 53 | "How to Make {seed.plural.title} Your Friend", 54 | "How to Spot {seed.singular.a.title}", 55 | "How to subtly put \"{seed.conceptnet_related_single_word}\" in every sentence", 56 | "How {seed.plural.title} Inspires Action", 57 | "How {seed.lower} might save your life", 58 | "I HATE {seed.plural.upper}", 59 | "I'm {seed.title.a} AND a U.S. Marine: ASK ME ANYTHING!", 60 | "Inside the Mind of a Master {seed.singular.title}", 61 | "Inside the Mind of {seed.singular.a.title}", 62 | "My Favourite {seed.singular.title} Lifehacks", 63 | "My Hobby: {seed.wikihow_action.ing.title}", 64 | "My Irrational Fear of {seed.plural.title}", 65 | "The Art of {seed.plural.title}", 66 | "An overview of sci-fi stories about {seed.wikihow_action.ing.lower}", 67 | "The Art of {seed.wikihow_action.ing.title}", 68 | "The Biggest Concerns About {seed.plural.title}", 69 | "The Danger of a Single {seed.singular.title}", 70 | "The Happy Secret to Better {seed.plural.title}", 71 | "The Power of {seed.plural.title}", 72 | "The Puzzle of {seed.plural.title}", 73 | "The Surprising Science of {seed.plural.title}", 74 | "The Surprising Science of {seed.wikihow_action.ing.title}", 75 | "The Thrilling Potential of {seed.plural.title}", 76 | "The {seed.title} Conundrum", 77 | "This Is What Happens When You {seed.wikihow_action.title}", 78 | "We need to talk about {seed.plural.lower}", 79 | "We. Need. More. {seed.plural.title}.", 80 | "What makes a good {seed.singular.lower}? Lessons from the longest study.", 81 | "What purpose do {seed.plural.lower} *really* have?", 82 | "What Your Choice in {seed.plural.title} Says About You", 83 | "What Your {seed.singular.title} Says About You", 84 | "What Your {seed.singular.title} Truly Says About You", 85 | "Why I Joined {seed.title.a} Cult and Why You Should Too", 86 | "Why We All Love {seed.wikihow_action.ing.title}", 87 | "Why We Do What We Do To {seed.wikihow_action.title}", 88 | "Why {seed.plural.title} Will Ruin Your Life", 89 | "Your {seed.plural.title} May Shape Who You Are", 90 | "{seed.plural.title} Aren't Everything. Believe Me.", 91 | "{seed.plural.title} Kill Creativity", 92 | "{seed.title.first_letter}... {seed.first_letter}... {seed.plural}?", 93 | "{seed.title}, No Matter What", 94 | "{seed.title}: The Power of {seed.wikihow_action.ing.title}", 95 | "{seed.wikihow_action.ing.title} in #one_or_larger##number##number# easy steps", 96 | "{seed.wikihow_action.ing.title}: Amazing Delight or Sign of Apocalypse?" 97 | ], 98 | "number": ["0","1","2","3","4","5","6","7","8","9"], 99 | "one_or_larger": ["1","2","3","4","5","6","7","8","9"], 100 | "two_or_larger": ["2","3","4","5","6","7","8","9"], 101 | "two_to_five": ["2","3","4","5"] 102 | } -------------------------------------------------------------------------------- /talkgenerator/schema/content_generator_structures.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains structures that are helpful for certain content generators, but not general enough for generator_util 3 | """ 4 | import os 5 | import random 6 | from typing import Tuple 7 | 8 | from talkgenerator.sources import conceptnet 9 | from talkgenerator.sources import goodreads, text_generator, reddit, wikihow 10 | from talkgenerator.util.generator_util import ( 11 | ExternalImageListGenerator, 12 | RelatedMappingGenerator, 13 | ) 14 | from talkgenerator.util.generator_util import FromListGenerator 15 | from talkgenerator.util.generator_util import ( 16 | SeededGenerator, 17 | BackupGenerator, 18 | ) 19 | 20 | 21 | # = TEXT GENERATORS= 22 | from talkgenerator.datastructures.image_data import ImageData 23 | 24 | 25 | def create_templated_text_generator(filename): 26 | actual_file = os_util.to_actual_file(filename) 27 | return text_generator.TemplatedTextGenerator(actual_file).generate 28 | 29 | 30 | def create_tracery_generator(filename, main="origin"): 31 | actual_file = os_util.to_actual_file(filename) 32 | return text_generator.TraceryTextGenerator(actual_file, main).generate 33 | 34 | 35 | # GOODREAD QUOTES 36 | class GoodReadsQuoteGenerator(object): 37 | def __init__(self, max_quote_length): 38 | self._max_quote_length = max_quote_length 39 | 40 | def __call__(self, presentation_context): 41 | def generator(seed): 42 | return [ 43 | quote 44 | for quote in goodreads.search_quotes(seed, 50) 45 | if len(quote) <= self._max_quote_length 46 | ] 47 | 48 | return FromListGenerator(SeededGenerator(generator))(presentation_context) 49 | 50 | 51 | # REDDIT 52 | from talkgenerator.util import os_util 53 | 54 | 55 | def create_reddit_image_generator(*name): 56 | reddit_generator = RedditImageGenerator("+".join(name)) 57 | return BackupGenerator(reddit_generator.generate, reddit_generator.generate_random) 58 | 59 | 60 | class RedditImageSearcher(object): 61 | def __init__(self, subreddit: str): 62 | self._subreddit = subreddit 63 | 64 | def __call__(self, seed: str): 65 | results = reddit.search_subreddit( 66 | self._subreddit, str(seed) + " nsfw:no (url:.jpg OR url:.png OR url:.gif)" 67 | ) 68 | if bool(results): 69 | return [ 70 | ImageData( 71 | image_url=post.url, 72 | source="u/" 73 | + post.author.name 74 | + " (on " 75 | + post.subreddit_name_prefixed 76 | + ")", 77 | ) 78 | for post in results 79 | ] 80 | 81 | 82 | class RedditImageGenerator: 83 | def __init__(self, subreddit: str): 84 | self._subreddit = subreddit 85 | 86 | self._generate = ExternalImageListGenerator( 87 | SeededGenerator(RedditImageSearcher(self._subreddit)), 88 | ) 89 | 90 | def generate(self, presentation_context): 91 | return self._generate(presentation_context) 92 | 93 | def generate_random(self, _): 94 | return self.generate({"seed": ""}) 95 | 96 | 97 | # ABOUT ME 98 | 99 | _about_me_facts_grammar = "data/text-templates/about_me_facts.json" 100 | job_description_generator = create_tracery_generator( 101 | _about_me_facts_grammar, "job_description" 102 | ) 103 | country_description_generator = create_tracery_generator( 104 | _about_me_facts_grammar, "country_description" 105 | ) 106 | 107 | 108 | def _apply_country_prefix(country_name): 109 | if random.uniform(0, 1) < 0.55: 110 | return country_name 111 | return country_description_generator() + country_name 112 | 113 | 114 | class CountryPrefixApplier(object): 115 | def __init__(self): 116 | pass 117 | 118 | def __call__(self, x: Tuple[str, str]): 119 | return _apply_country_prefix(x[0]), x[1] 120 | 121 | 122 | def _apply_job_prefix(job_name): 123 | if random.uniform(0, 1) < 0.55: 124 | return job_name 125 | return job_description_generator() + ": " + job_name 126 | 127 | 128 | class JobPrefixApplier(object): 129 | def __init__(self): 130 | pass 131 | 132 | def __call__(self, x: Tuple[str, str]): 133 | return _apply_job_prefix(x[0]), x[1] 134 | 135 | 136 | # SPLITTER 137 | 138 | 139 | class SplitCaptionsGenerator(object): 140 | def __init__(self, generator): 141 | self._generator = generator 142 | 143 | def __call__(self, presentation_context): 144 | line = self._generator(presentation_context) 145 | parts = line.split("|") 146 | return parts 147 | 148 | 149 | # BOLD STATEMENT 150 | 151 | bold_statement_templated_file = os_util.to_actual_file( 152 | "data/text-templates/bold_statements.txt" 153 | ) 154 | bold_statement_templated_generator = create_templated_text_generator( 155 | bold_statement_templated_file 156 | ) 157 | 158 | 159 | def generate_wikihow_bold_statement(presentation_context): 160 | seed = presentation_context["seed"] 161 | template_values = presentation_context 162 | related_actions = wikihow.get_related_wikihow_actions(seed) 163 | if related_actions: 164 | action = random.choice(related_actions) 165 | template_values.update({"action": action.title(), "seed": seed}) 166 | 167 | return bold_statement_templated_generator(template_values) 168 | 169 | 170 | class ConceptNetMapper(RelatedMappingGenerator): 171 | def __init__(self, generator): 172 | super().__init__(conceptnet.weighted_related_word_generator, generator) 173 | -------------------------------------------------------------------------------- /talkgenerator/datastructures/slide_generator_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from functools import lru_cache 3 | from typing import Collection, Union, Set, Callable, Tuple 4 | 5 | from talkgenerator.datastructures.image_data import ImageData 6 | 7 | 8 | logger = logging.getLogger("talkgenerator") 9 | 10 | 11 | class PeakedWeight(object): 12 | def __init__( 13 | self, peak_values: Tuple[int, ...], weight: float, other_weight: float 14 | ): 15 | self._peak_values = peak_values 16 | self._weight = weight 17 | self._other_weight = other_weight 18 | 19 | def __call__(self, slide_nr: int, num_slides: int): 20 | actual_peak_values = fix_indices(self._peak_values, num_slides) 21 | if slide_nr in actual_peak_values: 22 | return self._weight 23 | return self._other_weight 24 | 25 | 26 | @lru_cache(maxsize=30) 27 | def fix_indices(values: Collection[int], num_slides: int): 28 | return [value % num_slides if value < 0 else value for value in values] 29 | 30 | 31 | class ConstantWeightFunction(object): 32 | def __init__(self, weight): 33 | self._weight = weight 34 | 35 | def __call__(self, slide_nr, total_slides): 36 | return self._weight 37 | 38 | 39 | # Classes that are abstractly responsible for generating powerpoints 40 | 41 | 42 | class SlideGeneratorData: 43 | """ Responsible for providing the slide generator and other attributes, such as its name and weight""" 44 | 45 | def __init__( 46 | self, 47 | generator, 48 | weight_function: Callable[[int, int], float] = ConstantWeightFunction(1), 49 | retries: int = 5, 50 | allowed_repeated_elements: int = 0, 51 | tags=None, 52 | name=None, 53 | ): 54 | self._generator = generator 55 | self._weight_function = weight_function 56 | self._retries = retries 57 | self._name = name 58 | self._allowed_repeated_elements = allowed_repeated_elements 59 | if not tags: 60 | tags = set() 61 | self._tags = tags 62 | 63 | def generate(self, presentation_context, used_elements): 64 | """Generate a slide for a given presentation using the given seed.""" 65 | logger.debug('slide_generator_data.generate()') 66 | logger.debug('presentation_context: {}'.format(presentation_context)) 67 | logger.debug('used_elements: {}'.format(used_elements)) 68 | logger.debug('self._allowed_repeated_elements: {}'.format(self._allowed_repeated_elements)) 69 | 70 | # Try a certain amount of times 71 | for i in range(self._retries): 72 | logger.debug('retry: {}'.format(i)) 73 | logger.debug('self._generator: {}'.format(self._generator)) 74 | slide_results = self._generator.generate_slide( 75 | presentation_context, (used_elements, self._allowed_repeated_elements) 76 | ) 77 | logger.debug('slide_results: {}'.format(slide_results)) 78 | 79 | if slide_results: 80 | (slide, generated_elements) = slide_results 81 | logger.debug('slide: {}'.format(slide)) 82 | logger.debug('generated_elements: {}'.format(generated_elements)) 83 | 84 | # If the generated content is nothing, don't try again 85 | if _has_not_generated_something(generated_elements): 86 | return None 87 | 88 | if slide: 89 | # Add notes about the generation 90 | slide.set_note( 91 | "Seed: " 92 | + presentation_context["seed"] 93 | + "\nGenerator: " 94 | + str(self) 95 | + " \n Context: " 96 | + str(presentation_context) 97 | + " \n Generated Elements: " 98 | + str(generated_elements) 99 | ) 100 | 101 | # Add all sources of generated elements 102 | for generated_element in generated_elements: 103 | if isinstance(generated_element, ImageData): 104 | slide.add_source(generated_element.get_source()) 105 | 106 | return slide, generated_elements 107 | 108 | def get_weight_for(self, slide_nr: int, total_slides: int) -> float: 109 | """The weight of the generator for a particular slide. 110 | Determines how much chance it has being picked for a particular slide number""" 111 | return self._weight_function(slide_nr, total_slides) 112 | 113 | def get_allowed_repeated_elements(self) -> int: 114 | return self._allowed_repeated_elements 115 | 116 | def get_tags(self) -> Set[str]: 117 | return self._tags 118 | 119 | def __str__(self): 120 | if bool(self._name): 121 | return str(self._name) 122 | name = str(self._generator.__name__) 123 | if name == "": 124 | name = "Unnamed Generator" 125 | return "SlideGenerator[" + name + "]" 126 | 127 | 128 | def _has_not_generated_something(generated_elements) -> bool: 129 | generated_elements = set(generated_elements) 130 | _filter_generated_elements(generated_elements) 131 | return len(generated_elements) == 0 132 | 133 | 134 | def _filter_generated_elements(generated_elements: Set[Union[str, bool, None]]): 135 | if "" in generated_elements: 136 | generated_elements.remove("") 137 | if None in generated_elements: 138 | generated_elements.remove(None) 139 | if True in generated_elements: 140 | generated_elements.remove(True) 141 | if False in generated_elements: 142 | generated_elements.remove(False) 143 | -------------------------------------------------------------------------------- /talkgenerator/slide/slides.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABCMeta 3 | from typing import Dict 4 | 5 | from talkgenerator.slide import powerpoint_slide_creator 6 | 7 | logger = logging.getLogger("talkgenerator") 8 | 9 | 10 | class Slide(metaclass=ABCMeta): 11 | """ Class representing a slide object that could be used to export to Powerpoint pptx or other representations later 12 | """ 13 | 14 | def __init__(self, type_name: str, ppt_slide_creator, arguments: Dict): 15 | self._type_name = type_name 16 | self._ppt_slide_creator = ppt_slide_creator 17 | self._arguments = arguments 18 | self._note = "" 19 | self._sources = [] 20 | 21 | def add_source(self, source: str): 22 | if source is not None: 23 | self._sources.append(source) 24 | 25 | def set_note(self, note: str): 26 | self._note = note 27 | 28 | def create_powerpoint_slide(self, prs): 29 | """ Should generate a slide in the powerpoint """ 30 | ppt_slide = self._ppt_slide_creator(prs, **self._arguments) 31 | try: 32 | if ppt_slide: 33 | ppt_slide.notes_slide.notes_text_frame.text = self._note 34 | 35 | if len(self._sources): 36 | powerpoint_slide_creator.add_sources_note(ppt_slide, self._sources) 37 | 38 | except AttributeError as e: 39 | logger.error("attribute error on create slide {}".format(e)) 40 | return ppt_slide 41 | 42 | def to_slide_dictionary(self) -> dict: 43 | slide_dict = dict(self._arguments) 44 | slide_dict["type"] = self._type_name 45 | slide_dict["sources"] = self._sources 46 | return slide_dict 47 | 48 | 49 | class TitleSlide(Slide): 50 | def __init__(self, title:str, subtitle:str): 51 | super().__init__( 52 | type_name="title", 53 | ppt_slide_creator=powerpoint_slide_creator.create_title_slide, 54 | arguments={"title": title, "subtitle": subtitle}, 55 | ) 56 | 57 | 58 | class LarqeQuoteSlide(Slide): 59 | def __init__(self, title:str, text:str, background_image=None): 60 | super().__init__( 61 | type_name="large_quote", 62 | ppt_slide_creator=powerpoint_slide_creator.create_large_quote_slide, 63 | arguments={ 64 | "title": title, 65 | "text": text, 66 | "background_image": background_image, 67 | }, 68 | ) 69 | 70 | 71 | class ImageSlide(Slide): 72 | def __init__(self, title=None, image_url=None, original_image_size=True): 73 | super().__init__( 74 | type_name="image", 75 | ppt_slide_creator=powerpoint_slide_creator.create_image_slide, 76 | arguments={ 77 | "title": title, 78 | "image_url": image_url, 79 | "original_image_size": original_image_size, 80 | }, 81 | ) 82 | 83 | 84 | class FullImageSlide(Slide): 85 | def __init__(self, title=None, image_url=None, original_image_size=True): 86 | super().__init__( 87 | type_name="full_image", 88 | ppt_slide_creator=powerpoint_slide_creator.create_full_image_slide, 89 | arguments={ 90 | "title": title, 91 | "image_url": image_url, 92 | "original_image_size": original_image_size, 93 | }, 94 | ) 95 | 96 | 97 | class TwoColumnImageSlide(Slide): 98 | def __init__( 99 | self, 100 | title=None, 101 | caption_1=None, 102 | image_or_text_1=None, 103 | caption_2=None, 104 | image_or_text_2=None, 105 | original_image_size=True, 106 | ): 107 | super().__init__( 108 | type_name="two_column_image", 109 | ppt_slide_creator=powerpoint_slide_creator.create_two_column_images_slide, 110 | arguments={ 111 | "title": title, 112 | "caption_1": caption_1, 113 | "image_or_text_1": image_or_text_1, 114 | "caption_2": caption_2, 115 | "image_or_text_2": image_or_text_2, 116 | "original_image_size": original_image_size, 117 | }, 118 | ) 119 | 120 | 121 | class ThreeColumnImageSlide(Slide): 122 | def __init__( 123 | self, 124 | title=None, 125 | caption_1=None, 126 | image_or_text_1=None, 127 | caption_2=None, 128 | image_or_text_2=None, 129 | caption_3=None, 130 | image_or_text_3=None, 131 | original_image_size=True, 132 | ): 133 | super().__init__( 134 | type_name="three_column_image", 135 | ppt_slide_creator=powerpoint_slide_creator.create_three_column_images_slide, 136 | arguments={ 137 | "title": title, 138 | "caption_1": caption_1, 139 | "image_or_text_1": image_or_text_1, 140 | "caption_2": caption_2, 141 | "image_or_text_2": image_or_text_2, 142 | "caption_3": caption_3, 143 | "image_or_text_3": image_or_text_3, 144 | "original_image_size": original_image_size, 145 | }, 146 | ) 147 | 148 | 149 | class ChartSlide(Slide): 150 | def __init__(self, title, chart_type, chart_data, chart_modifier=None): 151 | super().__init__( 152 | type_name="chart", 153 | ppt_slide_creator=powerpoint_slide_creator.create_chart_slide, 154 | arguments={ 155 | "title": title, 156 | "chart_type": chart_type, 157 | "chart_data": chart_data, 158 | "chart_modifier": chart_modifier, 159 | }, 160 | ) 161 | -------------------------------------------------------------------------------- /talkgenerator/sources/conceptnet.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | from functools import lru_cache 4 | from pathlib import Path 5 | from urllib.parse import urlencode 6 | 7 | import requests 8 | # from cachier import cachier 9 | 10 | from talkgenerator.util import generator_util, cache_util 11 | 12 | URL = "http://api.conceptnet.io/c/en/{}?" 13 | 14 | _LOCATION_ARGUMENTS = cache_util.HashableDict(rel="/r/AtLocation", limit=100) 15 | _HASA_ARGUMENTS = cache_util.HashableDict(rel="/r/HasA", limit=200) 16 | _DEFAULT_ARGUMENTS = cache_util.HashableDict(limit=200) 17 | 18 | # HELPERS 19 | _PROHIBITED_SEARCH_TERMS = ( 20 | "a", 21 | "your", 22 | "my", 23 | "her", 24 | "his", 25 | "its", 26 | "their", 27 | "be", 28 | "an", 29 | "the", 30 | "you", 31 | "are", 32 | ) 33 | 34 | logger = logging.getLogger("talkgenerator.conceptnet") 35 | 36 | 37 | # Helpers 38 | def _remove_prohibited_words(word): 39 | return [part for part in word.split(" ") if part not in _PROHIBITED_SEARCH_TERMS] 40 | 41 | 42 | def normalise(word): 43 | return " ".join(_remove_prohibited_words(word)).lower() 44 | 45 | 46 | def remove_duplicates(entries): 47 | if entries: 48 | checked = set() 49 | result = [] 50 | for entry in entries: 51 | if entry: 52 | key = entry[1] 53 | if key in checked: 54 | continue 55 | checked.add(key) 56 | result.append(entry) 57 | return result 58 | 59 | 60 | def remove_containing(entries, prohibited_word): 61 | if entries: 62 | result = [] 63 | for entry in entries: 64 | if entry: 65 | key = entry[1] 66 | if prohibited_word in key: 67 | continue 68 | result.append(entry) 69 | return result 70 | 71 | 72 | def remove_nones(entries): 73 | if entries: 74 | result = [] 75 | for entry in entries: 76 | if entry: 77 | result.append(entry) 78 | return result 79 | return [] 80 | 81 | 82 | # RETRIEVING DATA 83 | 84 | 85 | @lru_cache(maxsize=20) 86 | # @cachier(cache_dir=Path("..", "tmp").absolute()) 87 | def _get_data(word, arguments=None): 88 | if not arguments: 89 | arguments = _DEFAULT_ARGUMENTS 90 | splitted_word = _remove_prohibited_words(word) 91 | search_term = "_".join(splitted_word) 92 | url = URL.format(search_term) + urlencode(arguments, False, "/") 93 | start = time.perf_counter() 94 | try: 95 | result = requests.get(url).json() 96 | except Exception as e: 97 | logger.warning("conceptnet _get_data timeout: {}".format(e)) 98 | result = None 99 | end = time.perf_counter() 100 | logger.info( 101 | "Took {} seconds to poll Conceptnet for '{}'".format(str(end - start), word) 102 | ) 103 | return result 104 | 105 | 106 | def _get_edges(word, arguments=None): 107 | data = _get_data(word, arguments) 108 | if data: 109 | return data["edges"] 110 | 111 | 112 | def _get_weight_and_word(edge, word): 113 | end_label = edge["end"]["label"] 114 | if not end_label == word: 115 | return edge["weight"], end_label 116 | 117 | 118 | def _get_relation_label(edge): 119 | return edge["rel"]["label"] 120 | 121 | 122 | def _get_from_relation(word, edges, relation_name): 123 | return remove_nones( 124 | [ 125 | _get_weight_and_word(edge, word) 126 | for edge in edges 127 | if _get_relation_label(edge) == relation_name 128 | ] 129 | ) 130 | 131 | 132 | # EXTRACTING INFO 133 | 134 | 135 | def is_english(node): 136 | return node and (not "language" in node or node["language"] == "en") 137 | 138 | 139 | def is_different_enough_label(edge, word): 140 | label = edge["label"].lower() 141 | word_lower = word.lower() 142 | return not label in word_lower and not word_lower in label 143 | 144 | 145 | def get_weighted_related_words(word, limit=50): 146 | edges = _get_edges(word, cache_util.HashableDict(limit=limit)) 147 | starts = [ 148 | (edge["weight"], edge["start"]["label"]) 149 | for edge in edges 150 | if is_different_enough_label(edge["start"], word) and is_english(edge["start"]) 151 | ] 152 | ends = [ 153 | (edge["weight"], edge["end"]["label"]) 154 | for edge in edges 155 | if is_different_enough_label(edge["end"], word) and is_english(edge["end"]) 156 | ] 157 | result = starts + ends 158 | return result 159 | 160 | 161 | def get_weighted_related_locations(word): 162 | edges = _get_edges(word, _LOCATION_ARGUMENTS) 163 | return _get_from_relation(word, edges, "AtLocation") 164 | 165 | 166 | def get_weighted_has(word): 167 | edges = _get_edges(word, _HASA_ARGUMENTS) 168 | return _get_from_relation(word, edges, "HasA") 169 | 170 | 171 | def get_weighted_properties(word): 172 | edges = _get_edges(word) 173 | return _get_from_relation(word, edges, "HasProperty") 174 | 175 | 176 | def get_weighted_antonyms(word): 177 | edges = _get_edges(word) 178 | return _get_from_relation(word, edges, "Antonym") 179 | 180 | 181 | # Weighted 182 | weighted_location_generator = generator_util.WeightedGenerator( 183 | get_weighted_related_locations 184 | ) 185 | weighted_antonym_generator = generator_util.WeightedGenerator(get_weighted_antonyms) 186 | weighted_related_word_generator = generator_util.WeightedGenerator( 187 | get_weighted_related_words 188 | ) 189 | 190 | # Unweighted 191 | unweighted_location_generator = generator_util.UnweightedGenerator( 192 | get_weighted_related_locations 193 | ) 194 | unweighted_antonym_generator = generator_util.UnweightedGenerator(get_weighted_antonyms) 195 | unweighted_related_word_generator = generator_util.UnweightedGenerator( 196 | get_weighted_related_words 197 | ) 198 | -------------------------------------------------------------------------------- /talkgenerator/sources/wikihow.py: -------------------------------------------------------------------------------- 1 | """ Module for interacting with Wikihow """ 2 | import re 3 | import time 4 | import logging 5 | from functools import lru_cache 6 | from itertools import chain 7 | from pathlib import Path 8 | 9 | import inflect 10 | import requests 11 | from bs4 import BeautifulSoup 12 | # from cachier import cachier 13 | 14 | from talkgenerator import settings 15 | 16 | logger = logging.getLogger("talkgenerator") 17 | 18 | _LOG_IN_URL = "https://www.wikihow.com/index.php?title=Special:UserLogin&action=submitlogin&type=login" 19 | _ADVANCED_SEARCH_URL = ( 20 | "https://www.wikihow.com/index.php?title=Special%3ASearch&profile=default&search={}" 21 | "&fulltext=Search&ss=relevance&so=desc&ffriy=1&ffrin=1&fft=ffta&fftsi=&profile=default" 22 | ) 23 | 24 | 25 | def _create_log_in_session(username, password): 26 | log_in_credentials = {"wpName": username, "wpPassword": password} 27 | session = requests.session() 28 | max_session_attempts = 16 29 | trial = 1 30 | success = False 31 | 32 | while not success and trial < max_session_attempts: 33 | try: 34 | resp = session.post(_LOG_IN_URL, log_in_credentials, log_in_credentials) 35 | if "Unable to continue login." in resp.text: 36 | logger.warning("Requests login failed. Unable to continue login.") 37 | return False 38 | else: 39 | success = True 40 | except requests.exceptions.ConnectionError: 41 | wait_time = 0.25 * 2 ** trial 42 | 43 | # increment the trial counter 44 | trial += 1 45 | logger.error( 46 | "Connection error with Wikihow! Retrying in " 47 | + str(wait_time) 48 | + " seconds." 49 | ) 50 | time.sleep(wait_time) 51 | return _create_log_in_session(username, password) 52 | 53 | if trial < max_session_attempts: 54 | logger.info("Logged into Wikihow") 55 | else: 56 | logger.warning("Failed logging into Wikihow") 57 | return session 58 | 59 | 60 | def get_wikihow_session(): 61 | wikihow_credentials = settings.wikihow_auth() 62 | # if session: 63 | # logger.warning( 64 | # "Found Wikihow Session object in credentials, skipping loggin in" 65 | # ) 66 | # return wikihow_credentials["session"] 67 | # else: 68 | # logger.warning( 69 | # "No Wikihow Session object in credentials, attempting log in..." 70 | # ) 71 | session = _create_log_in_session(**wikihow_credentials) 72 | wikihow_credentials["session"] = session 73 | return session 74 | 75 | 76 | def remove_how_to(wikihow_title): 77 | index_of_to = wikihow_title.find("to") 78 | return wikihow_title[index_of_to + 3 :] 79 | 80 | 81 | def clean_wikihow_action(action): 82 | action = _remove_between_brackets(action) 83 | action = _remove_trademarks(action) 84 | action = action.strip() 85 | return action 86 | 87 | 88 | def _remove_between_brackets(sentence): 89 | while True: 90 | s_new = re.sub(r"\([^(]*?\)", r"", sentence) 91 | if s_new == sentence: 92 | break 93 | sentence = s_new 94 | return sentence 95 | 96 | 97 | def _remove_trademarks(action): 98 | if " - wikihow.com" in action: 99 | return re.sub(" - wikihow.com", "", action) 100 | return action 101 | 102 | 103 | @lru_cache(maxsize=20) 104 | # @cachier(cache_dir=Path("..", "tmp").absolute()) 105 | def basic_search_wikihow(search_words): 106 | return requests.get( 107 | "https://en.wikihow.com/wikiHowTo?search=" + search_words.replace(" ", "+") 108 | ) 109 | 110 | 111 | # wikihow_session = get_wikihow_session() 112 | wikihow_session = None 113 | 114 | 115 | @lru_cache(maxsize=20) 116 | # @cachier(cache_dir=Path("..", "tmp").absolute()) 117 | def _advanced_search_wikihow(search_words): 118 | # session = get_wikihow_session() 119 | if wikihow_session: 120 | url = _ADVANCED_SEARCH_URL.format(search_words.replace(" ", "+")) 121 | resp = wikihow_session.get(url, allow_redirects=True) 122 | if "Login Required - wikiHow" in str(resp.content): 123 | logger.warning( 124 | "WARNING: Problem logging in on Wikihow: Advanced Search disabled" 125 | ) 126 | return resp 127 | return None 128 | 129 | 130 | def get_related_wikihow_actions_basic_search(seed_word): 131 | page = basic_search_wikihow(seed_word) 132 | # Try again but with plural if nothing is found 133 | if not page: 134 | page = basic_search_wikihow(inflect.engine().plural(seed_word)) 135 | 136 | soup = BeautifulSoup(page.content, "html.parser") 137 | actions_elements = soup.find_all("a", class_="result_link") 138 | action_titles = list( 139 | chain.from_iterable( 140 | [a.find_all("div", "result_title") for a in actions_elements] 141 | ) 142 | ) 143 | actions = [ 144 | clean_wikihow_action(remove_how_to(x.get_text())) 145 | for x in action_titles 146 | if x is not None and not x.get_text().startswith("Category") 147 | ] 148 | return actions 149 | 150 | 151 | def get_related_wikihow_actions_advanced_search(seed_word): 152 | page = _advanced_search_wikihow(seed_word) 153 | # Try again but with plural if nothing is found 154 | if not page: 155 | page = _advanced_search_wikihow(inflect.engine().plural(seed_word)) 156 | if page: 157 | soup = BeautifulSoup(page.content, "html.parser") 158 | actions_elements = soup.find_all("div", class_="mw-search-result-heading") 159 | actions = [clean_wikihow_action(x.find("a")["title"]) for x in actions_elements] 160 | return actions 161 | return [] 162 | 163 | 164 | def get_related_wikihow_actions(seed_word): 165 | """ Uses the advanced search unless it doesn't return anything """ 166 | # actions = get_related_wikihow_actions_advanced_search(seed_word) 167 | # if actions: 168 | # return actions 169 | return get_related_wikihow_actions_basic_search(seed_word) 170 | -------------------------------------------------------------------------------- /talkgenerator/schema/slide_topic_generators.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import random 3 | import logging 4 | from functools import lru_cache 5 | from typing import List, Collection 6 | 7 | from talkgenerator.sources import conceptnet, phrasefinder 8 | from talkgenerator.util import language_util, random_util 9 | 10 | # == TOPIC GENERATORS == 11 | 12 | logger = logging.getLogger("talkgenerator") 13 | 14 | 15 | class SlideSeedGenerator: 16 | def get_seed(self, slide_nr: int) -> str: 17 | raise NotImplementedError("") 18 | 19 | 20 | class SideTrackingTopicGenerator(SlideSeedGenerator): 21 | """ This generator will make small side tracks around topics, but keeps returning every X slides""" 22 | 23 | def __init__( 24 | self, topics: List[str], num_slides: int, topic_return_period_range=range(3, 6) 25 | ): 26 | self._topics = topics 27 | self._num_slides = num_slides 28 | 29 | seeds: List[str] = [None] * num_slides 30 | 31 | # Make it begin and end with the topic 32 | if num_slides > 0: 33 | # End with main topic 34 | seeds[-1] = topics[0] 35 | 36 | if len(topics) == 1: 37 | # Add the returning topic if only one topic given 38 | idx = 0 39 | while idx < num_slides: 40 | seeds[idx] = topics[0] 41 | idx += random.choice(topic_return_period_range) 42 | else: 43 | # Disperse all topics over the slides if multiple topics given 44 | _disperse(seeds, topics, 0, num_slides - 1) 45 | 46 | # Fill in the blanks with related topics 47 | previous = seeds.copy() 48 | while None in seeds: 49 | fill_in_blank_topics_with_related(seeds) 50 | logger.info("SideTrackingTopicGenerator concept seeds: {}".format(seeds)) 51 | if seeds == previous: 52 | fill_in_blanks_with(seeds, topics[0]) 53 | break 54 | previous = seeds.copy() 55 | 56 | # Convert None's to literal none's for debugging purposes 57 | seeds = [seed if seed else "None" for seed in seeds] 58 | 59 | self._seeds = seeds 60 | 61 | def get_seed(self, slide_nr: int) -> str: 62 | return self._seeds[slide_nr] 63 | 64 | def all_seeds(self): 65 | return self._seeds 66 | 67 | 68 | def _disperse(seeds, topics, min_idx, max_idx): 69 | range_size = max_idx - min_idx + 1 70 | step_size = range_size / len(topics) 71 | for i in range(len(topics)): 72 | seeds_index = int(min_idx + step_size * i) 73 | seeds[seeds_index] = topics[i] 74 | 75 | 76 | def fill_in_blank_topics_with_related(seeds, distance=1): 77 | for i in range(len(seeds)): 78 | _fill_in(seeds, i) 79 | 80 | 81 | def fill_in_blanks_with(seeds, topic): 82 | for i in range(len(seeds)): 83 | if not seeds[i]: 84 | seeds[i] = topic 85 | 86 | 87 | def normalise_weighted_word(weighted_word): 88 | return weighted_word[0], normalise_seed(weighted_word[1]) 89 | 90 | 91 | def _fill_in(seeds, i, distance=1): 92 | if seeds[i] is None: 93 | 94 | # Check for neighbours 95 | if i - distance >= 0 and seeds[i - distance]: 96 | neighbour = seeds[i - distance] 97 | 98 | try: 99 | related = conceptnet.get_weighted_related_words(neighbour, 25) 100 | if len(related) == 0: 101 | related = conceptnet.get_weighted_related_words( 102 | normalise_seed(neighbour), 25 103 | ) 104 | 105 | except Exception as e: 106 | logger.info("Conceptnet related words failing: {}".format(e)) 107 | related = [] 108 | 109 | normalised_related = map(normalise_weighted_word, related) 110 | # pool = multiprocessing.Pool() 111 | # normalised_related = pool.map(normalise_weighted_word, related) 112 | # pool.close() 113 | 114 | filtered_related = [ 115 | weighted_word 116 | for weighted_word in normalised_related 117 | if not weighted_word[1] in seeds and len(weighted_word[1]) > 2 118 | ] 119 | 120 | if len(filtered_related) > 0: 121 | seeds[i] = normalise_seed(random_util.weighted_random(filtered_related)) 122 | 123 | # Check if still unassigned 124 | if seeds[i] is None: 125 | _fill_in(seeds, i, distance + 1) 126 | 127 | 128 | @lru_cache(maxsize=300) 129 | def normalise_seed(seed): 130 | normalised = conceptnet.normalise(seed).lower() 131 | normalised = language_util.replace_non_alphabetical_characters(normalised) 132 | # if " " in normalised: 133 | # rarest_word = phrasefinder.get_rarest_word(normalised) 134 | # if rarest_word is not None: 135 | # normalised = rarest_word 136 | # else: 137 | # last_word = normalised.split(" ")[-1] 138 | # normalised = last_word 139 | 140 | logger.info("Mapping seed '" + seed + "' => " + normalised) 141 | return normalised 142 | 143 | 144 | class IdentityTopicGenerator(SlideSeedGenerator): 145 | """ Generates always the given topic as the seed for each slide """ 146 | 147 | def __init__(self, topics: Collection[str], _): 148 | self._topics = topics 149 | 150 | def get_seed(self, _) -> str: 151 | return random.choice(self._topics) 152 | 153 | 154 | # class SynonymTopicGenerator: 155 | # """ Generates a bunch of related words (e.g. synonyms) of a word to generate topics for a presentation""" 156 | # 157 | # def __init__(self, topic, number_of_slides): 158 | # self._topic = topic 159 | # self._slides_nr = number_of_slides 160 | # synonyms = language_util.get_synonyms(topic) 161 | # # seeds.extend(get_relations(topic)) 162 | # 163 | # # Check if enough generated 164 | # if len(synonyms) < number_of_slides: 165 | # # If nothing: big problem! 166 | # if len(synonyms) == 0: 167 | # synonyms = [topic] 168 | # 169 | # # Now fill the seeds up with repeating topics 170 | # number_of_repeats = int(math.ceil(number_of_slides / len(synonyms))) 171 | # synonyms = numpy.tile(synonyms, number_of_repeats) 172 | # 173 | # # Take random `number_of_slides` elements 174 | # random.shuffle(synonyms) 175 | # self._seeds = synonyms[0: number_of_slides] 176 | # 177 | # def generate_seed(self, slide_nr): 178 | # return self._seeds[slide_nr] 179 | -------------------------------------------------------------------------------- /talkgenerator/util/language_util.py: -------------------------------------------------------------------------------- 1 | """ Module providing language-related operations to manipulate strings""" 2 | import logging 3 | import re 4 | import string 5 | 6 | import inflect 7 | import nltk 8 | 9 | logger = logging.getLogger("talkgenerator") 10 | 11 | 12 | def check_and_download(): 13 | required_corpus_list = ["tokenizers/punkt", "taggers/averaged_perceptron_tagger"] 14 | try: 15 | for corpus in required_corpus_list: 16 | _check_and_download_corpus(corpus, corpus.split("/")[1]) 17 | except Exception as e: 18 | logging.error(e) 19 | print_corpus_download_warning() 20 | return False 21 | 22 | return True 23 | 24 | 25 | def _check_and_download_corpus(corpus_fullname, corpus_shortname): 26 | try: 27 | nltk.data.find(corpus_fullname) 28 | except LookupError as le: 29 | logging.error(le) 30 | nltk.download(corpus_shortname) 31 | 32 | 33 | def print_corpus_download_warning(): 34 | corpus_warning = """ 35 | Hmm... 36 | --------------------- 37 | 38 | We had some trouble downloading the NLTK corpuses.. 39 | Try running the following from a command line. This should 40 | download the needed packages.. but it might also tell you if 41 | there is another issue. 42 | 43 | $ python3 -m nltk.downloader punkt averaged_perceptron_tagger 44 | """ 45 | logger.warning(corpus_warning) 46 | 47 | 48 | # Helpers 49 | 50 | 51 | def _replace_word_one_case(sentence, word, replacement, flags=0): 52 | return re.sub( 53 | r"(^|\W)" + word + r"(\W|$)", r"\1" + replacement + r"\2", sentence, flags=flags 54 | ) 55 | 56 | 57 | def replace_word(sentence, word, replacement): 58 | lowered = _replace_word_one_case(sentence, word.lower(), replacement.lower()) 59 | upper = _replace_word_one_case(lowered, word.upper(), replacement.upper()) 60 | titled = _replace_word_one_case(upper, word.title(), replacement.title()) 61 | result = _replace_word_one_case(titled, word, replacement, re.I) 62 | return result 63 | 64 | 65 | def get_pos_tags(word): 66 | """ Returns all possible POS tags for a given word according to nltk """ 67 | tags = nltk.pos_tag(nltk.word_tokenize(word)) 68 | tags_strings = [tag[1] for tag in tags] 69 | # print(word, ":", tags_strings) 70 | return tags_strings 71 | 72 | 73 | # Verbs 74 | 75 | 76 | def get_verb_index(words): 77 | seen_adverb = False 78 | for i in range(len(words)): 79 | tags = get_pos_tags(words[i]) 80 | # Is verb: return 81 | if "VB" in tags: 82 | return i 83 | # Is adverb: return next non adverb 84 | if "RB" in tags: 85 | seen_adverb = True 86 | continue 87 | # Something following an adverb thats not an adverb? See as verb 88 | if seen_adverb: 89 | return i 90 | return 0 91 | 92 | 93 | def apply_function_to_verb(action, func): 94 | words = action.split(" ") 95 | verb_index = get_verb_index(words) 96 | first_word = func(words[verb_index]) 97 | if len(words) == 1: 98 | return first_word 99 | return ( 100 | " ".join(words[:verb_index]) 101 | + " " 102 | + first_word 103 | + " " 104 | + " ".join(words[verb_index + 1 :]) 105 | ).strip() 106 | 107 | 108 | def to_present_participle(action): 109 | return apply_function_to_verb(action, to_ing_form) 110 | 111 | 112 | # From https://github.com/arsho/46-Simple-Python-Exercises-Solutions/blob/master/problem_25.py 113 | def _make_ing_form(passed_string): 114 | passed_string = passed_string.lower() 115 | letter = list(string.ascii_lowercase) 116 | vowel = ["a", "e", "i", "o", "u"] 117 | consonant = [c for c in letter if c not in vowel] 118 | exception = ["be", "see", "flee", "knee", "lie"] 119 | 120 | if passed_string.endswith("ie"): 121 | passed_string = passed_string[:-2] 122 | return passed_string + "ying" 123 | 124 | elif passed_string.endswith("e"): 125 | if passed_string in exception: 126 | return passed_string + "ing" 127 | else: 128 | passed_string = passed_string[:-1] 129 | return passed_string + "ing" 130 | 131 | elif passed_string.endswith("y") or passed_string.endswith("w"): 132 | return passed_string + "ing" 133 | 134 | elif ( 135 | len(passed_string) >= 3 136 | and passed_string[-1] in consonant 137 | and passed_string[-2] in vowel 138 | and passed_string[-3] in consonant 139 | ): 140 | passed_string += passed_string[-1] 141 | return passed_string + "ing" 142 | else: 143 | return passed_string + "ing" 144 | 145 | 146 | def to_ing_form(passed_string): 147 | result = _make_ing_form(passed_string) 148 | if passed_string.islower(): 149 | return result.lower() 150 | if passed_string.isupper(): 151 | return result.upper() 152 | if passed_string.istitle(): 153 | return result.title() 154 | return result 155 | 156 | 157 | inflect_engine = inflect.engine() 158 | 159 | 160 | def is_singular(word): 161 | return inflect_engine.singular_noun(word) is False 162 | 163 | 164 | def is_plural(word): 165 | return bool(inflect_engine.singular_noun(word)) 166 | 167 | 168 | def to_plural(word): 169 | if is_singular(word): 170 | if word.startswith("a "): 171 | word = word[2:] 172 | return inflect_engine.plural(word) 173 | return word 174 | 175 | 176 | def to_singular(word): 177 | if is_plural(word): 178 | return inflect_engine.singular_noun(word) 179 | return word 180 | 181 | 182 | def add_article(word): 183 | # TODO: Maybe more checks, some u's cause "an", or some big letters in case it's an abbreviation 184 | word_lower = word.lower() 185 | article = "a" 186 | if ( 187 | word_lower.startswith("a") 188 | or word_lower.startswith("e") 189 | or word_lower.startswith("i") 190 | or word_lower.startswith("o") 191 | ): 192 | article = "an" 193 | return article + " " + word 194 | 195 | 196 | # Pronouns 197 | 198 | 199 | def second_to_first_pronouns(sentence): 200 | sentence = replace_word(sentence, "yours", "mine") 201 | sentence = replace_word(sentence, "your", "my") 202 | sentence = replace_word(sentence, "you", "me") 203 | return sentence 204 | 205 | 206 | # POS tag checkers 207 | 208 | # TODO: These don't work well, but might be useful features in our text generation language 209 | def is_noun(word): 210 | return "NN" in get_pos_tags(word) 211 | 212 | 213 | def is_verb(word): 214 | return "VB" in get_pos_tags(word) 215 | 216 | 217 | # Special operators 218 | 219 | 220 | def get_last_noun_and_article(sentence): 221 | tokens = nltk.word_tokenize(sentence) 222 | tags = nltk.pos_tag(tokens) 223 | 224 | noun = None 225 | for tag in reversed(tags): 226 | if "NN" in tag[1]: 227 | if noun: 228 | noun = (tag[0] + " " + noun).strip() 229 | else: 230 | noun = tag[0] 231 | 232 | # If encountering an article while there is a noun found 233 | elif bool(noun): 234 | if "DT" in tag[1] or "PRP$" in tag[1]: 235 | return tag[0] + " " + noun 236 | return noun 237 | 238 | return None 239 | 240 | 241 | def replace_non_alphabetical_characters(text): 242 | return re.sub(r"[^A-Za-z\s\b -]+", "", text) 243 | 244 | 245 | def is_vowel(character): 246 | return character in ["a", "e", "i", "o,", "u"] 247 | 248 | 249 | def is_consonant(character): 250 | return not is_vowel(character) 251 | -------------------------------------------------------------------------------- /talkgenerator/generator.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pathlib 4 | import random 5 | import subprocess 6 | import sys 7 | import logging 8 | from typing import List, Union, Tuple, Optional 9 | 10 | from pptx import Presentation 11 | 12 | from talkgenerator.slide.slide_deck import SlideDeck 13 | from talkgenerator.schema.content_generators import full_name_generator 14 | from talkgenerator.schema.presentation_schema_types import get_schema 15 | from talkgenerator import runtime_checker 16 | from talkgenerator.sources import phrasefinder 17 | from talkgenerator.util import os_util 18 | 19 | DEFAULT_PRESENTATION_TOPIC = "cat" 20 | MAX_PRESENTATION_SAVE_TRIES = 100 21 | 22 | logger = logging.getLogger("talkgenerator") 23 | 24 | 25 | def generate_presentation_using_cli_arguments(args) -> Tuple[Presentation, SlideDeck, str]: 26 | """Make a talk with the given topic.""" 27 | 28 | runtime_checker.check_runtime_environment() 29 | 30 | # Print status details 31 | logger.info("******************************************") 32 | logger.info("Making {} slide talk on: {}".format(args.num_slides, args.topic)) 33 | 34 | return generate_presentation( 35 | schema=args.schema, 36 | slides=args.num_slides, 37 | topic=args.topic, 38 | title=args.title, 39 | presenter=args.presenter, 40 | parallel=args.parallel, 41 | int_seed=args.int_seed, 42 | print_logs=args.print_logs, 43 | save_ppt=args.save_ppt, 44 | open_ppt=args.open_ppt, 45 | ) 46 | 47 | 48 | def generate_presentation( 49 | schema: str, 50 | slides: int, 51 | topic: Union[str, List[str]] = None, 52 | title: str = None, 53 | presenter: str = None, 54 | parallel: bool = True, 55 | int_seed: int = None, 56 | save_ppt: bool = True, 57 | output_folder: str = "../output/", 58 | open_ppt: bool = False, 59 | print_logs=False, 60 | ) -> Tuple[Presentation, SlideDeck, str]: 61 | 62 | logger.info('**************************') 63 | logger.info('Generating presentation...') 64 | if print_logs: 65 | os_util.show_logs(logger) 66 | 67 | if int_seed is not None: 68 | random.seed(int_seed) 69 | 70 | # Retrieve the schema to generate the presentation with 71 | presentation_schema = get_schema(schema) 72 | logger.info('Presentation schema: {}'.format(presentation_schema)) 73 | 74 | # Generate random presenter name if no presenter name given 75 | if not presenter: 76 | presenter = full_name_generator() 77 | 78 | if not topic: 79 | if title: 80 | topic = phrasefinder.get_rarest_word(title) 81 | else: 82 | topic = DEFAULT_PRESENTATION_TOPIC 83 | 84 | # Extract topics from given (possibly comma separated) topic 85 | if type(topic) in [list, tuple]: 86 | topics = topic 87 | else: 88 | topics = [topic.strip() for topic in topic.split(",")] 89 | 90 | logger.info('Presentation topics: {}'.format(topics)) 91 | logger.info('Presentation num_slides: {}'.format(slides)) 92 | logger.info('Presentation presenter: {}'.format(presenter)) 93 | logger.info('Presentation title: {}'.format(title)) 94 | logger.info('Presentation parallel: {}'.format(parallel)) 95 | logger.info('Presentation int_seed: {}'.format(int_seed)) 96 | logger.info('Presentation save_ppt: {}'.format(save_ppt)) 97 | 98 | # Generate the presentation object 99 | presentation, slide_deck = presentation_schema.generate_presentation( 100 | topics=topics, 101 | num_slides=slides, 102 | presenter=presenter, 103 | title=title, 104 | parallel=parallel, 105 | int_seed=int_seed, 106 | save_ppt=save_ppt, 107 | ) 108 | 109 | logger.info('**************************') 110 | logger.info('Presentation generated: {}'.format(presentation)) 111 | logger.info('Slide deck generated: {}'.format(slide_deck)) 112 | 113 | cleaned_topics = ",".join(topics).replace(" ", "").replace(",", "_") 114 | file_name = "".join(e for e in cleaned_topics if e.isalnum() or e == "_") 115 | 116 | logger.info( 117 | "Slide deck structured data: {}".format(slide_deck.get_structured_data()) 118 | ) 119 | 120 | # Save presentation 121 | presentation_file = None 122 | if save_ppt: 123 | presentation_file = save_presentation_to_pptx( 124 | output_folder, file_name, presentation 125 | ) 126 | 127 | # Open the presentation 128 | if open_ppt and presentation_file is not None: 129 | path = os.path.realpath(presentation_file) 130 | _open_file(path) 131 | 132 | return presentation, slide_deck, presentation_file 133 | 134 | 135 | def save_presentation_to_pptx(output_folder: str, file_name: str, prs, index=0) -> Optional[str]: 136 | """Save the talk.""" 137 | if index > MAX_PRESENTATION_SAVE_TRIES: 138 | return None 139 | 140 | suffix = "_" + str(index) if index > 0 else "" 141 | fp: str = os.path.join(output_folder, str(file_name) + str(suffix) + ".pptx") 142 | 143 | # If file already exists, don't overwrite it: 144 | if pathlib.Path(fp).is_file(): 145 | return save_presentation_to_pptx(output_folder, file_name, prs, index + 1) 146 | 147 | # Create the parent folder if it doesn't exist 148 | pathlib.Path(os.path.dirname(fp)).mkdir(parents=True, exist_ok=True) 149 | 150 | try: 151 | prs.save(fp) 152 | logger.info("Saved talk to {}".format(fp)) 153 | return fp 154 | except PermissionError: 155 | return save_presentation_to_pptx(output_folder, file_name, prs, index + 1) 156 | 157 | 158 | def _open_file(filename: str): 159 | """Platform independent open method to cover different OS.""" 160 | if sys.platform == "win32": 161 | os.startfile(filename) 162 | else: 163 | opener = "open" if sys.platform == "darwin" else "xdg-open" 164 | subprocess.call([opener, filename]) 165 | 166 | 167 | def str2bool(v): 168 | # stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse 169 | if v.lower() in ("yes", "true", "t", "y", "1"): 170 | return True 171 | elif v.lower() in ("no", "false", "f", "n", "0"): 172 | return False 173 | else: 174 | raise argparse.ArgumentTypeError("Boolean value expected.") 175 | 176 | 177 | def get_argument_parser(): 178 | parser = argparse.ArgumentParser(description="Quickly build a slide deck.") 179 | parser.add_argument("--topic", default="", type=str, help="Topic of presentation.") 180 | parser.add_argument( 181 | "--num_slides", 182 | "--slides", 183 | default=10, 184 | type=int, 185 | help="Number of slides to create.", 186 | ) 187 | parser.add_argument( 188 | "--int_seed", 189 | default=None, 190 | type=int, 191 | help="Seed used for random.seed(int_seed). Fill in any number to add more consistency between runs.", 192 | ) 193 | parser.add_argument( 194 | "--schema", 195 | default="default", 196 | type=str, 197 | help="The presentation schema to generate the presentation with", 198 | ) 199 | parser.add_argument( 200 | "--presenter", 201 | default=None, 202 | type=str, 203 | help="The full name of the presenter, leave blank to randomise", 204 | ) 205 | parser.add_argument( 206 | "--title", 207 | default=None, 208 | type=str, 209 | help="The title of the talk, leave blank to randomise", 210 | ) 211 | parser.add_argument( 212 | "--parallel", 213 | default=True, 214 | type=str2bool, 215 | help=( 216 | "Generated powerpoint will generate in parallel " 217 | + "faster but drops some conditions)" 218 | ), 219 | ) 220 | parser.add_argument( 221 | "--print_logs", 222 | default=True, 223 | type=str2bool, 224 | help="Print logs about the generation process.", 225 | ) 226 | parser.add_argument( 227 | "--output_folder", 228 | default="../output/", 229 | type=str, 230 | help="The folder to output the generated presentations", 231 | ) 232 | parser.add_argument( 233 | "--save_ppt", 234 | default=True, 235 | type=str2bool, 236 | help="If this flag is true, the generated powerpoint will be saved", 237 | ) 238 | parser.add_argument( 239 | "--open_ppt", 240 | default=True, 241 | type=str2bool, 242 | help="Generated powerpoint will automatically open", 243 | ) 244 | return parser 245 | -------------------------------------------------------------------------------- /talkgenerator/sources/text_generator.py: -------------------------------------------------------------------------------- 1 | """ This module helps out with generating text using templates """ 2 | import json 3 | import random 4 | import re 5 | from functools import lru_cache 6 | 7 | import tracery 8 | from tracery.modifiers import base_english 9 | 10 | from talkgenerator.sources import conceptnet 11 | from talkgenerator.sources import phrasefinder 12 | from talkgenerator.sources import wikihow 13 | from talkgenerator.util import language_util 14 | from talkgenerator.util import os_util 15 | from talkgenerator.util import random_util 16 | 17 | known_functions = { 18 | "title": str.title, 19 | "lower": str.lower, 20 | "upper": str.upper, 21 | "dashes": lambda words: words.replace(" ", "-"), 22 | "first_letter": lambda words: words[0], 23 | "last_letter_is_vowel": lambda word: word 24 | if language_util.is_vowel(word[-1]) 25 | else None, 26 | "last_letter_is_consonant": lambda word: word 27 | if language_util.is_consonant(word[-1]) 28 | else None, 29 | "a": lambda word: language_util.add_article(word), 30 | "ing": language_util.to_present_participle, 31 | "plural": language_util.to_plural, 32 | "singular": language_util.to_singular, 33 | # "synonym": generator_util.FromListGenerator(language_util.get_synonyms), 34 | "2_to_1_pronouns": language_util.second_to_first_pronouns, 35 | "wikihow_action": lambda seed: random_util.choice_optional( 36 | wikihow.get_related_wikihow_actions(seed) 37 | ), 38 | "get_last_noun_and_article": language_util.get_last_noun_and_article, 39 | # Conceptnet 40 | "conceptnet_location": conceptnet.weighted_location_generator, 41 | "conceptnet_related": conceptnet.weighted_related_word_generator, 42 | "conceptnet_related_single_word": lambda word: phrasefinder.get_rarest_word( 43 | conceptnet.weighted_related_word_generator(word) 44 | ), 45 | # Checkers 46 | "is_noun": lambda word: word if language_util.is_noun(word) else None, 47 | "is_verb": lambda word: word if language_util.is_verb(word) else None, 48 | # Unique: To make a variable not be the same as something else with the same parameters 49 | "unique": lambda x: x, 50 | } 51 | 52 | 53 | class AbstractTextGenerator(object): 54 | def generate(self, variables_dictionary): 55 | raise NotImplementedError() 56 | 57 | def generate_with_seed(self, seed): 58 | return self.generate({"seed": seed}) 59 | 60 | 61 | class TemplatedTextGenerator(AbstractTextGenerator): 62 | def __init__(self, template_file=None, templates_list=None): 63 | templates = [] 64 | if template_file: 65 | templates.extend(read_lines(template_file)) 66 | if templates_list: 67 | templates.extend(templates_list) 68 | # Create a tuple so no templates can accidentally be deleted from the generator 69 | self._templates = tuple(templates) 70 | 71 | def generate(self, variables_dictionary=None): 72 | """ Generates a text from the templates using the given variables dictionary""" 73 | # Set empty dictionary if none is given 74 | if not bool(variables_dictionary): 75 | variables_dictionary = {} 76 | # Create a mutable copy of the templates list 77 | possible_templates = list(self._templates) 78 | for i in range(len(possible_templates)): 79 | template = random.choice(possible_templates) 80 | if can_format_with(template, variables_dictionary): 81 | result = apply_variables_to_template(template, variables_dictionary) 82 | if result: 83 | return result 84 | # Remove the template from the possible templates list, such that it won 85 | possible_templates.remove(template) 86 | 87 | 88 | class TraceryTextGenerator(AbstractTextGenerator): 89 | def __init__(self, tracery_json, variable="origin"): 90 | with open(os_util.to_actual_file(tracery_json)) as grammar_file: 91 | grammar = get_tracery_grammar(grammar_file) 92 | grammar.add_modifiers(base_english) 93 | self._grammar = grammar 94 | self._variable = variable 95 | 96 | def generate(self, variables_dictionary=None): 97 | """ Generates a text from internal tracery grammar using the given variables dictionary""" 98 | # Set empty dictionary if none is given 99 | if not bool(variables_dictionary): 100 | variables_dictionary = {} 101 | 102 | # Generate 103 | for i in range(100): # TODO prune the grammar instead of retrying 104 | template = self._grammar.flatten("#" + self._variable + "#") 105 | if can_format_with(template, variables_dictionary): 106 | result = apply_variables_to_template(template, variables_dictionary) 107 | if result: 108 | return result 109 | 110 | 111 | @lru_cache(maxsize=20) 112 | def get_tracery_grammar(grammar_file): 113 | return tracery.Grammar(json.load(grammar_file)) 114 | 115 | 116 | def can_format_with(template, variables_dictionary): 117 | """ Checks if the template can be fully formatted by the given variable dictionary without errors""" 118 | format_variables = get_format_variables(template) 119 | return (len(format_variables) == 0 and len(variables_dictionary) == 0) or set( 120 | format_variables 121 | ) <= set(variables_dictionary.keys()) 122 | 123 | 124 | def get_format_variables(template): 125 | """ Finds all the names of the variables used in the template """ 126 | return {x[0] for x in get_format_variables_and_functions(template)} 127 | 128 | 129 | def get_format_variables_and_functions(template): 130 | """ Finds all the names of the variables used in the template with their functions in a large tuple""" 131 | matches = re.findall(r"{(\w+)((?:[.]\w+)*)}", template) 132 | return set(matches) 133 | 134 | 135 | def apply_variables_to_template(template, variables_dictionary): 136 | variables_and_functions = get_format_variables_and_functions(template) 137 | applied = apply_functions_to_variables( 138 | template, variables_dictionary, variables_and_functions 139 | ) 140 | if applied: 141 | (template, variables_dictionary) = applied 142 | return template.format(**variables_dictionary) 143 | 144 | 145 | def apply_functions(variable, functions): 146 | """ Applies a list of functions to a variable """ 147 | result = variable 148 | for func in functions: 149 | # Check if it transformed the result into None 150 | if result is None: 151 | return None 152 | 153 | if func in known_functions: 154 | result = known_functions[func](result) 155 | # Check if it is a dictionary, as is allowed in real str.format 156 | elif isinstance(result, dict) and func in result: 157 | result = result[func] 158 | # Unique identifier to make similar functions on a variable have different effects 159 | elif func.isdigit(): 160 | result = result 161 | else: 162 | raise ValueError("Unknown function:", func) 163 | 164 | return result 165 | 166 | 167 | def apply_functions_to_variables( 168 | template, variables_dictionary, variables_and_functions 169 | ): 170 | """ Applies the functions of the variables_and_functions tuple and stores them in the variable dictionary and 171 | updates the template """ 172 | variables_and_functions = list(variables_and_functions) 173 | variables_and_functions.sort(key=lambda a: len(a), reverse=True) 174 | 175 | for var_func in variables_and_functions: 176 | # Check if it has functions to apply 177 | if len(var_func) > 1 and len(var_func[1]) > 0: 178 | old_var_name = var_func[0] + var_func[1] 179 | functions = var_func[1][1:].split(".") 180 | variable_name = var_func[0] 181 | variable = variables_dictionary[variable_name] 182 | applied_functions = apply_functions(variable, functions) 183 | if applied_functions is not None: 184 | applied_var_name = old_var_name.replace(".", "_") 185 | # Replace all occurrences with the dot to the underscore notation 186 | template = template.replace(old_var_name, applied_var_name) 187 | # Store in dictionary 188 | variables_dictionary[applied_var_name] = applied_functions 189 | else: 190 | return None 191 | 192 | return template, variables_dictionary 193 | 194 | 195 | def read_lines(filename): 196 | """ Reads all the string lines from a file """ 197 | return os_util.read_lines(filename) 198 | -------------------------------------------------------------------------------- /talkgenerator/slide/powerpoint_slide_creator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | from functools import lru_cache 5 | from io import BytesIO 6 | from pathlib import Path 7 | from typing import List 8 | 9 | import requests 10 | import PIL 11 | from PIL import Image 12 | from PIL import UnidentifiedImageError 13 | from lxml.etree import XMLSyntaxError 14 | from pptx import Presentation 15 | 16 | from talkgenerator.datastructures.image_data import ImageData 17 | from talkgenerator.util import os_util 18 | 19 | # Location of powerpoint template 20 | _POWERPOINT_TEMPLATE_FILE = "data/powerpoint/template.pptx" 21 | 22 | logger = logging.getLogger("talkgenerator") 23 | 24 | 25 | @lru_cache(maxsize=1) 26 | def get_powerpoint_template_file(): 27 | return os_util.to_actual_file(_POWERPOINT_TEMPLATE_FILE) 28 | 29 | 30 | SOURCES_PLACEHOLDER = 10 31 | 32 | # Layouts index in template 33 | LAYOUT_TITLE_SLIDE = 0 34 | LAYOUT_TITLE_AND_CONTENT = 1 35 | LAYOUT_SECTION_HEADER = 2 36 | LAYOUT_TWO_CONTENT = 3 37 | LAYOUT_TWO_TITLE_AND_CONTENT = 4 38 | LAYOUT_TITLE_ONLY = 5 39 | LAYOUT_BLANK = 6 40 | LAYOUT_CONTENT_CAPTION = 7 41 | LAYOUT_PICTURE_CAPTION = 8 42 | LAYOUT_FULL_PICTURE = 11 43 | LAYOUT_TITLE_AND_PICTURE = 12 44 | LAYOUT_LARGE_QUOTE = 13 45 | LAYOUT_TWO_TITLE_AND_IMAGE = 14 46 | LAYOUT_THREE_TITLE_AND_IMAGE = 15 47 | LAYOUT_TITLE_AND_CHART = 16 48 | 49 | 50 | # = HELPERS = 51 | class FileLikeImage: 52 | def get_file_like(self): 53 | raise NotImplementedError() 54 | 55 | def image(self): 56 | raise NotImplementedError() 57 | 58 | 59 | class ExternalImage(FileLikeImage): 60 | def __init__(self, url): 61 | self._url = url 62 | 63 | @lru_cache() 64 | def get_bytes_io(self): 65 | response = requests.get(self._url) 66 | tmp_img = BytesIO(response.content) 67 | return tmp_img 68 | 69 | def get_file_like(self): 70 | return self.get_bytes_io() 71 | 72 | def image(self): 73 | open_image = None 74 | try: 75 | open_image = Image.open(self.get_bytes_io()) 76 | except PIL.UnidentifiedImageError as e: 77 | logging.error(e) 78 | logging.error('PIL.UnidentifiedImageError') 79 | return open_image 80 | 81 | 82 | class InternalImage(FileLikeImage): 83 | def __init__(self, file_location): 84 | self._file_location = file_location 85 | 86 | def get_file_like(self): 87 | return self._file_location 88 | 89 | def image(self): 90 | return Image.open(self._file_location()) 91 | 92 | # CREATION 93 | def _create_slide(prs, slide_type): 94 | """ Creates a new slide in the given presentation using the slide_type template """ 95 | return prs.slides.add_slide(prs.slide_layouts[slide_type]) 96 | 97 | 98 | def _add_title(slide, title): 99 | """ Adds the given title to the slide if the title is present""" 100 | if title: 101 | title_object = slide.shapes.title 102 | title_object.text = title 103 | return True 104 | 105 | 106 | def _add_text(slide, placeholder_id, text): 107 | if text: 108 | placeholder = slide.placeholders[placeholder_id] 109 | placeholder.text = str(text) 110 | return True 111 | 112 | 113 | def is_external_url(url: str): 114 | return url.startswith("http") 115 | 116 | 117 | def _add_image( 118 | slide, placeholder_id: int, image: ImageData, original_image_size: bool = True 119 | ): 120 | if isinstance(image, ImageData): 121 | image_url = image.get_image_url() 122 | else: 123 | image_url = image 124 | 125 | if is_external_url(image_url): 126 | image_ref = ExternalImage(image_url) 127 | else: 128 | path = Path(image_url).absolute() 129 | image_ref = InternalImage(str(path)) 130 | 131 | placeholder = slide.placeholders[placeholder_id] 132 | if original_image_size: 133 | # Calculate the image size of the image 134 | try: 135 | # Insert the picture 136 | try: 137 | width, height = image_ref.image().size 138 | # Make sure the placeholder doesn't zoom in 139 | placeholder.height = height 140 | placeholder.width = width 141 | placeholder = placeholder.insert_picture(image_ref.get_file_like()) 142 | # Calculate ratios and compare 143 | image_ratio = width / height 144 | placeholder_ratio = placeholder.width / placeholder.height 145 | ratio_difference = placeholder_ratio - image_ratio 146 | # Placeholder width too wide: 147 | if ratio_difference > 0: 148 | difference_on_each_side = ratio_difference / 2 149 | placeholder.crop_left = -difference_on_each_side 150 | placeholder.crop_right = -difference_on_each_side 151 | # Placeholder height too high 152 | else: 153 | difference_on_each_side = -ratio_difference / 2 154 | placeholder.crop_bottom = -difference_on_each_side 155 | placeholder.crop_top = -difference_on_each_side 156 | return placeholder 157 | except (ValueError, XMLSyntaxError, AttributeError) as e: 158 | logger.error("_add_image error: {}".format(e)) 159 | return None 160 | 161 | except FileNotFoundError as fnfe: 162 | logger.error("_add_image file not found: {}".format(fnfe)) 163 | return None 164 | else: 165 | try: 166 | return placeholder.insert_picture(image_ref.get_file_like()) 167 | except (OSError, ValueError) as e: 168 | logger.error(e) 169 | logger.error("Unexpected error inserting image: {}:{}".format(image, sys.exc_info()[0])) 170 | return None 171 | 172 | 173 | def _add_chart(slide, placeholder_id, chart_type, chart_data): 174 | placeholder = slide.placeholders[placeholder_id] 175 | return placeholder.insert_chart(chart_type, chart_data) 176 | 177 | 178 | def _add_image_or_text(slide, placeholder_id, image_url_or_text, original_image_size): 179 | if os_util.is_image(image_url_or_text): 180 | return _add_image(slide, placeholder_id, image_url_or_text, original_image_size) 181 | else: 182 | return _add_text(slide, placeholder_id, image_url_or_text) 183 | 184 | 185 | def _print_all_placeholders(slide): 186 | for shape in slide.placeholders: 187 | print("%d %s" % (shape.placeholder_format.idx, shape.name)) 188 | 189 | 190 | def add_sources_note(slide, _sources: List[str]): 191 | return _add_text( 192 | slide, SOURCES_PLACEHOLDER, "[Image sources: " + ", ".join(_sources) + "]" 193 | ) 194 | 195 | 196 | # FORMAT GENERATORS 197 | # These are functions that get some inputs (texts, images...) 198 | # and create layouted slide with these inputs 199 | 200 | 201 | def create_new_powerpoint() -> Presentation: 202 | return Presentation(get_powerpoint_template_file()) 203 | 204 | 205 | def create_title_slide(prs, title, subtitle): 206 | slide = _create_slide(prs, LAYOUT_TITLE_SLIDE) 207 | _add_title(slide, title) 208 | _add_text(slide, 1, subtitle) 209 | return slide 210 | 211 | 212 | def create_large_quote_slide(prs, title, text, background_image=None): 213 | if bool(text): 214 | slide = _create_slide(prs, LAYOUT_LARGE_QUOTE) 215 | if title: 216 | _add_title(slide, title) 217 | _add_text(slide, 1, text) 218 | if background_image: 219 | _add_image(slide, 11, background_image, False) 220 | 221 | # Add black transparent image for making other image behind it transparent (missing feature in python-pptx) 222 | data_folder = Path(__file__).parent.parent / "data" / "images" / "black-transparent.png" 223 | _add_image(slide, 12, ImageData(str(data_folder.absolute())), False) 224 | 225 | return slide 226 | 227 | 228 | def create_image_slide(prs, title=None, image_url=None, original_image_size=True): 229 | """ Creates a slide with an image covering the whole slide""" 230 | # TODO debug this: the image can not be set! 231 | return _create_single_image_slide( 232 | prs, title, image_url, LAYOUT_TITLE_AND_PICTURE, original_image_size 233 | ) 234 | 235 | 236 | def create_full_image_slide(prs, title=None, image_url=None, original_image_size=True): 237 | """ Creates a slide with an image covering the whole slide""" 238 | return _create_single_image_slide( 239 | prs, title, image_url, LAYOUT_FULL_PICTURE, original_image_size 240 | ) 241 | 242 | 243 | def create_two_column_images_slide( 244 | prs, 245 | title=None, 246 | caption_1=None, 247 | image_or_text_1=None, 248 | caption_2=None, 249 | image_or_text_2=None, 250 | original_image_size=True, 251 | ): 252 | # if _is_valid_content(image_or_text_1) and _is_valid_content(image_or_text_2): 253 | slide = _create_slide(prs, LAYOUT_TWO_TITLE_AND_IMAGE) 254 | _add_title(slide, title) 255 | _add_text(slide, 1, caption_1) 256 | _add_image_or_text(slide, 13, image_or_text_1, original_image_size) 257 | _add_text(slide, 3, caption_2) 258 | _add_image_or_text(slide, 14, image_or_text_2, original_image_size) 259 | return slide 260 | 261 | 262 | def create_three_column_images_slide( 263 | prs, 264 | title=None, 265 | caption_1=None, 266 | image_or_text_1=None, 267 | caption_2=None, 268 | image_or_text_2=None, 269 | caption_3=None, 270 | image_or_text_3=None, 271 | original_image_size=True, 272 | ): 273 | # if ( 274 | # _is_valid_content(image_or_text_1) 275 | # and _is_valid_content(image_or_text_2) 276 | # and _is_valid_content(image_or_text_3) 277 | # ): 278 | slide = _create_slide(prs, LAYOUT_THREE_TITLE_AND_IMAGE) 279 | _add_title(slide, title) 280 | _add_text(slide, 1, caption_1) 281 | _add_image_or_text(slide, 13, image_or_text_1, original_image_size) 282 | _add_text(slide, 3, caption_2) 283 | _add_image_or_text(slide, 14, image_or_text_2, original_image_size) 284 | _add_text(slide, 15, caption_3) 285 | _add_image_or_text(slide, 16, image_or_text_3, original_image_size) 286 | return slide 287 | 288 | 289 | # def create_two_column_images_slide_text_second(prs, title=None, caption_1=None, image_1=None, caption_2=None, 290 | # quote=None, 291 | # original_image_size=True): 292 | # if bool(image_1): 293 | # slide = _create_slide(prs, LAYOUT_TWO_TITLE_AND_IMAGE) 294 | # _add_title(slide, title) 295 | # _add_text(slide, 1, caption_1) 296 | # _add_image_or_text(slide, 13, image_1, original_image_size) 297 | # _add_text(slide, 3, caption_2) 298 | # _add_image_or_text(slide, 14, quote) 299 | # return slide 300 | 301 | 302 | def _create_single_image_slide(prs, title, image_url, slide_template_idx, fit_image): 303 | # if _is_valid_content(image_url): 304 | slide = _create_slide(prs, slide_template_idx) 305 | _add_title(slide, title) 306 | _add_image_or_text(slide, 1, image_url, fit_image) 307 | return slide 308 | 309 | 310 | def create_chart_slide(prs, title, chart_type, chart_data, chart_modifier=None): 311 | slide = _create_slide(prs, LAYOUT_TITLE_AND_CHART) 312 | _add_title(slide, title) 313 | chart = _add_chart(slide, 10, chart_type, chart_data).chart 314 | if chart_modifier: 315 | chart_modifier(chart, chart_data) 316 | return slide 317 | -------------------------------------------------------------------------------- /talkgenerator/schema/content_generators.py: -------------------------------------------------------------------------------- 1 | from typing import Collection, Union 2 | 3 | from talkgenerator.sources import pixabay, pexels 4 | from talkgenerator.schema.content_generator_structures import * 5 | from talkgenerator.sources import inspirobot 6 | from talkgenerator.sources import shitpostbot 7 | from talkgenerator.sources import unsplash 8 | from talkgenerator.util.generator_util import * 9 | 10 | # =============================== 11 | # ===== CONTENT GENERATORS ===== 12 | # =============================== 13 | 14 | # === TEXT GENERATORS === 15 | 16 | # TITLES 17 | talk_title_generator = create_tracery_generator("data/text-templates/talk_title.json") 18 | talk_ted_title_generator = create_tracery_generator( 19 | "data/text-templates/talk_title.json", "ted_title" 20 | ) 21 | talk_subtitle_generator = create_tracery_generator( 22 | "data/text-templates/talk_subtitle.json" 23 | ) 24 | 25 | 26 | def talk_title_generator_if_not_generated(presentation_context): 27 | if presentation_context["title"] is not None: 28 | return presentation_context["title"] 29 | return talk_title_generator(presentation_context) 30 | 31 | 32 | default_slide_title_generator = create_templated_text_generator( 33 | "data/text-templates/default_slide_title.txt" 34 | ) 35 | deep_abstract_generator = create_templated_text_generator( 36 | "data/text-templates/deep_abstract.txt" 37 | ) 38 | 39 | default_or_no_title_generator = CombinedGenerator( 40 | (1, default_slide_title_generator), (1, NoneGenerator()) 41 | ) 42 | 43 | anticipation_title_generator = create_templated_text_generator( 44 | "data/text-templates/anticipation_title.txt" 45 | ) 46 | 47 | conclusion_title_generator = create_templated_text_generator( 48 | "data/text-templates/conclusion_title.txt" 49 | ) 50 | inspiration_title_generator = create_templated_text_generator( 51 | "data/text-templates/inspiration.txt" 52 | ) 53 | anecdote_title_generator = create_templated_text_generator( 54 | "data/text-templates/anecdote_title.txt" 55 | ) 56 | history_title_generator = create_templated_text_generator( 57 | "data/text-templates/history.txt" 58 | ) 59 | history_person_title_generator = create_templated_text_generator( 60 | "data/text-templates/history_person.txt" 61 | ) 62 | history_and_history_person_title_generator = CombinedGenerator( 63 | (4, history_title_generator), (6, history_person_title_generator) 64 | ) 65 | about_me_title_generator = create_templated_text_generator( 66 | "data/text-templates/about_me_title.txt" 67 | ) 68 | 69 | # NAMES 70 | historical_name_generator = create_tracery_generator( 71 | "data/text-templates/name.json", "title_name" 72 | ) 73 | full_name_generator = create_tracery_generator( 74 | "data/text-templates/name.json", "full_name" 75 | ) 76 | 77 | # ABOUT ME 78 | _about_me_facts_grammar = "data/text-templates/about_me_facts.json" 79 | book_description_generator = create_tracery_generator( 80 | _about_me_facts_grammar, "book_description" 81 | ) 82 | location_description_generator = create_tracery_generator( 83 | _about_me_facts_grammar, "location_description" 84 | ) 85 | hobby_description_generator = create_tracery_generator( 86 | _about_me_facts_grammar, "hobby_description" 87 | ) 88 | job_generator = create_tracery_generator(_about_me_facts_grammar, "job") 89 | country_generator = create_tracery_generator(_about_me_facts_grammar, "country") 90 | 91 | # PROMPTS & CHALLENGES 92 | 93 | anecdote_prompt_generator = create_templated_text_generator( 94 | "data/text-templates/anecdote_prompt.txt" 95 | ) 96 | 97 | # QUOTES 98 | goodreads_quote_generator = GoodReadsQuoteGenerator(250) 99 | goodreads_short_quote_generator = GoodReadsQuoteGenerator(140) 100 | 101 | # DOUBLE CAPTIONS 102 | 103 | _double_image_captions_generator = create_templated_text_generator( 104 | "data/text-templates/double_captions.txt" 105 | ) 106 | _triple_image_captions_generator = create_templated_text_generator( 107 | "data/text-templates/triple_captions.txt" 108 | ) 109 | _historic_double_captions_generator = create_templated_text_generator( 110 | "data/text-templates/historic_double_captions.txt" 111 | ) 112 | 113 | double_image_captions_generator = SplitCaptionsGenerator( 114 | _double_image_captions_generator 115 | ) 116 | triple_image_captions_generator = SplitCaptionsGenerator( 117 | _triple_image_captions_generator 118 | ) 119 | historic_double_captions_generator = SplitCaptionsGenerator( 120 | _historic_double_captions_generator 121 | ) 122 | 123 | # Conclusions 124 | _conclusions_tuple_grammar = "data/text-templates/conclusion_tuple.json" 125 | conclusion_two_captions_tuple_generator = SplitCaptionsGenerator( 126 | create_tracery_generator(_conclusions_tuple_grammar, "two_conclusions") 127 | ) 128 | 129 | conclusion_three_captions_tuple_generator = SplitCaptionsGenerator( 130 | create_tracery_generator(_conclusions_tuple_grammar, "three_conclusions") 131 | ) 132 | 133 | # === IMAGE GENERATORS === 134 | 135 | # INSPIROBOT 136 | inspirobot_image_generator = inspirobot.get_random_inspirobot_image 137 | 138 | # GIFS 139 | 140 | reddit_gif_generator = create_reddit_image_generator( 141 | "gifs", "gif", "gifextra", "nonononoYES" 142 | ) 143 | 144 | combined_gif_generator = CombinedGenerator((1, reddit_gif_generator)) 145 | 146 | # REDDIT 147 | 148 | meme_reddit_image_generator = create_reddit_image_generator( 149 | "meme", 150 | "memes", 151 | "MemeEconomy", 152 | "wholesomememes", 153 | "dankmemes", 154 | "AdviceAnimals", 155 | "comics", 156 | ) 157 | weird_reddit_image_generator = create_reddit_image_generator( 158 | "hmmm", 159 | "hmm", 160 | "wtf", 161 | "wtfstockphotos", 162 | "weirdstockphotos", 163 | "darkstockphotos", 164 | "photoshopbattles", 165 | "confusing_perspective", 166 | "cursedimages", 167 | "HybridAnimals", 168 | "EyeBleach", 169 | "natureismetal", 170 | "195", 171 | ) 172 | 173 | neutral_reddit_image_generator = create_reddit_image_generator( 174 | "Cinemagraphs", 175 | "itookapicture", 176 | "Art", 177 | "artstore", 178 | "pics", 179 | "analog", 180 | "ExposurePorn", 181 | "Illustration", 182 | ) 183 | 184 | shitpostbot_image_generator = ExternalImageListGenerator( 185 | SeededGenerator( 186 | BackupGenerator( 187 | shitpostbot.search_images_rated, shitpostbot.get_random_images_rated 188 | ) 189 | ), 190 | weighted=True, 191 | ) 192 | 193 | weird_punchline_static_image_generator = CombinedGenerator( 194 | (4, weird_reddit_image_generator), 195 | (6, shitpostbot_image_generator), 196 | (1, meme_reddit_image_generator), 197 | ) 198 | 199 | weird_punchline_image_generator = CombinedGenerator( 200 | (10, weird_reddit_image_generator), 201 | (8, shitpostbot_image_generator), 202 | (6, combined_gif_generator), 203 | (1, meme_reddit_image_generator), 204 | ) 205 | 206 | 207 | # UNSPLASH 208 | generate_unsplash_image_from_word = ExternalImageListGenerator( 209 | unsplash.search_photos, check_image_validness=False 210 | ) 211 | generate_random_unsplash_image_from_word = ExternalImageListGenerator( 212 | unsplash.random_as_list, check_image_validness=False 213 | ) 214 | generate_unsplash_image = SeededGenerator(generate_unsplash_image_from_word) 215 | generate_random_unsplash_image = SeededGenerator( 216 | generate_random_unsplash_image_from_word 217 | ) 218 | 219 | # PIXABAY 220 | generate_pixabay_image_from_word = ExternalImageListGenerator(pixabay.search_photos) 221 | generate_horizontal_pixabay_image_from_word = ExternalImageListGenerator( 222 | pixabay.search_horizontal 223 | ) 224 | generate_pixabay_image = SeededGenerator(generate_pixabay_image_from_word) 225 | # PEXELS 226 | 227 | generate_pexels_image_from_word = ExternalImageListGenerator(pexels.search_photos) 228 | generate_pexels_image = SeededGenerator(generate_pexels_image_from_word) 229 | 230 | # COPYRIGHT FREE 231 | 232 | copyright_free_generator = CombinedGenerator( 233 | (1, generate_unsplash_image), 234 | (1, generate_pixabay_image), 235 | (1, generate_pexels_image), 236 | (0.01, generate_random_unsplash_image), 237 | ) 238 | copyright_free_generator_from_word = CombinedGenerator( 239 | (1, generate_unsplash_image_from_word), 240 | (1, generate_pixabay_image_from_word), 241 | (1, generate_pexels_image_from_word), 242 | (0.01, generate_random_unsplash_image_from_word), 243 | ) 244 | 245 | generate_horizontal_pixabay_image = CombinedGenerator( 246 | (100, SeededGenerator(generate_horizontal_pixabay_image_from_word)), 247 | # Backup: 248 | (0.01, copyright_free_generator), 249 | ) 250 | 251 | copyright_free_related_generator_from_word = ConceptNetMapper( 252 | copyright_free_generator_from_word 253 | ) 254 | copyright_free_related_generator = SeededGenerator( 255 | copyright_free_related_generator_from_word 256 | ) 257 | 258 | 259 | def copyright_free_prefixed_generator(prefixes: Union[str, Collection[str]]): 260 | return SeededGenerator(copyright_free_prefixed_generator_from_word(prefixes)) 261 | 262 | 263 | def copyright_free_prefixed_generator_from_word(prefixes: Union[str, Collection[str]]): 264 | if isinstance(prefixes, str): 265 | return PrefixedGenerator(prefixes, copyright_free_generator_from_word) 266 | generators = [ 267 | (1, PrefixedGenerator(p, copyright_free_generator_from_word)) for p in prefixes 268 | ] 269 | return CombinedGenerator(*generators) 270 | 271 | 272 | weird_copyright_free_generator = copyright_free_prefixed_generator( 273 | ["weird", "humor", "funny"] 274 | ) 275 | normal_or_weird_copyright_free_generator = CombinedGenerator( 276 | (1, copyright_free_generator), (1, weird_copyright_free_generator) 277 | ) 278 | 279 | # NEUTRAL 280 | 281 | neutral_image_generator = CombinedGenerator( 282 | (1000, copyright_free_generator), (300, neutral_reddit_image_generator), 283 | ) 284 | 285 | neutral_image_generator_from_word = CombinedGenerator( 286 | (1000, copyright_free_generator_from_word), 287 | (300, UnseededGenerator(neutral_reddit_image_generator)), 288 | ) 289 | 290 | neutral_or_weird_image_generator = CombinedGenerator( 291 | (1, neutral_image_generator), (1, weird_punchline_image_generator) 292 | ) 293 | 294 | # OLD/VINTAGE 295 | vintage_person_generator = create_reddit_image_generator("OldSchoolCool") 296 | vintage_picture_generator = create_reddit_image_generator( 297 | "TheWayWeWere", "100yearsago", "ColorizedHistory" 298 | ) 299 | 300 | reddit_book_cover_generator = create_reddit_image_generator( 301 | "BookCovers", "fakebookcovers", "coverdesign", "bookdesign" 302 | ) 303 | 304 | reddit_location_image_generator = create_reddit_image_generator( 305 | "evilbuildings", "itookapicture", "SkyPorn", "EarthPorn" 306 | ) 307 | 308 | # TUPLED ABOUT ME 309 | 310 | about_me_hobby_tuple_generator = TupledGenerator( 311 | hobby_description_generator, weird_punchline_image_generator 312 | ) 313 | about_me_book_tuple_generator = TupledGenerator( 314 | book_description_generator, reddit_book_cover_generator 315 | ) 316 | about_me_location_tuple_generator = TupledGenerator( 317 | location_description_generator, reddit_location_image_generator 318 | ) 319 | 320 | about_me_job_tuple_generator = MappedGenerator( 321 | InspiredTupleGenerator( 322 | MappedGenerator(job_generator, str.title), neutral_image_generator_from_word 323 | ), 324 | JobPrefixApplier(), 325 | ) 326 | 327 | about_me_country_tuple_generator = MappedGenerator( 328 | InspiredTupleGenerator(country_generator, neutral_image_generator_from_word), 329 | CountryPrefixApplier(), 330 | ) 331 | 332 | about_me_location_or_country_tuple_generator = CombinedGenerator( 333 | (3, about_me_country_tuple_generator), (1, about_me_location_tuple_generator) 334 | ) 335 | 336 | # Charts 337 | 338 | reddit_chart_generator = create_reddit_image_generator( 339 | "dataisbeautiful", "funnycharts", "charts" 340 | ) 341 | -------------------------------------------------------------------------------- /talkgenerator/sources/chart.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | 4 | from pptx.chart.data import ChartData 5 | from pptx.chart.data import XyChartData 6 | from pptx.enum.chart import XL_CHART_TYPE 7 | from pptx.enum.chart import XL_LABEL_POSITION 8 | from pptx.enum.chart import XL_TICK_MARK 9 | 10 | from talkgenerator.sources import conceptnet, text_generator 11 | from talkgenerator.util import generator_util 12 | 13 | yes_no_question_generator = text_generator.TraceryTextGenerator( 14 | "data/text-templates/chart_texts.json", "yes_no_question" 15 | ).generate 16 | funny_yes_no_answer_generator = text_generator.TraceryTextGenerator( 17 | "data/text-templates/chart_texts.json", "funny_yes_no_answer" 18 | ).generate 19 | location_question_generator = text_generator.TraceryTextGenerator( 20 | "data/text-templates/chart_texts.json", "location_question" 21 | ).generate 22 | property_question_generator = text_generator.TraceryTextGenerator( 23 | "data/text-templates/chart_texts.json", "property_question" 24 | ).generate 25 | correlation_title_generator = text_generator.TraceryTextGenerator( 26 | "data/text-templates/chart_texts.json", "correlation_title" 27 | ).generate 28 | 29 | 30 | # DATA POINTS HELPERS 31 | 32 | 33 | def add_noise_to_points(max_noise_ratio, datapoints): 34 | return [add_noise_to_point(max_noise_ratio, point) for point in datapoints] 35 | 36 | 37 | def add_noise_to_point(max_noise_ratio, datapoint): 38 | return max( 39 | 0, datapoint + (datapoint * random.uniform(-max_noise_ratio, max_noise_ratio)) 40 | ) 41 | 42 | 43 | def add_gaussian_noise_to_multidim_points(max_noise_ratio, datapoints): 44 | return [ 45 | _add_gaussian_noise_to_multidim_point(max_noise_ratio, point) 46 | for point in datapoints 47 | ] 48 | 49 | 50 | def _add_gaussian_noise_to_multidim_point(max_noise_ratio, datapoint): 51 | return [value * random.gauss(1, max_noise_ratio) for value in datapoint] 52 | 53 | 54 | def normalise_data(datapoints): 55 | total_sum = sum(datapoints) 56 | return [datapoint / total_sum for datapoint in datapoints] 57 | 58 | 59 | def is_too_similar_for_axes(word1, word2): 60 | """ Checks if the words contain each other """ 61 | return word1 in word2 or word2 in word1 62 | 63 | 64 | def create_interesting_curve_function(): 65 | # Build an optional list 66 | 67 | # random small integer 68 | a = random.uniform(-10, 10) 69 | b = random.uniform(0.001, 10) 70 | 71 | # random relative 72 | r = random.uniform(0, 1) 73 | 74 | interesting_functions = [ 75 | lambda x: a * x, 76 | lambda x: a / x, 77 | lambda x: a + x, 78 | lambda x: a - x, 79 | # lambda x: min(float(5e8), float(a ** math.log(x))), 80 | # lambda x: min(float(5e8), float(x ** math.log(a))), 81 | lambda x: math.sin(x), 82 | ] 83 | 84 | chosen = random.choice(interesting_functions) 85 | 86 | # Add chance of adding another function 87 | # random_number = random.uniform(0, 1) 88 | # if random_number < 0.4: 89 | # chosen = lambda x: random.choice(interesting_functions)(chosen(x)) 90 | # elif random_number < 0.8: 91 | # chosen = lambda x: random.choice(interesting_functions)(x) * chosen(x) 92 | # else: 93 | # chosen = lambda x: random.choice(interesting_functions)(x) + chosen(x) 94 | 95 | return chosen 96 | 97 | 98 | # DATA SET CREATION 99 | 100 | 101 | def create_equal_data_with_outlier_end( 102 | size, noise_factor, normal_min, normal_max, outlier_min_size, outlier_max_size 103 | ): 104 | # Create data with same number between normal_min and normal_max everywhere 105 | datapoints = [random.uniform(normal_min, normal_max) for _ in range(0, size)] 106 | 107 | # Make last number an outlier 108 | datapoints[-1] = random.uniform(outlier_min_size, outlier_max_size) 109 | 110 | # Apply noise 111 | datapoints = add_noise_to_points(noise_factor, datapoints) 112 | 113 | return datapoints 114 | 115 | 116 | def generate_random_x(lower_bound, upper_bound, number): 117 | return [random.uniform(lower_bound, upper_bound) for _ in range(number)] 118 | 119 | 120 | def generate_y(xs, function): 121 | return [(x, function(x)) for x in xs] 122 | 123 | 124 | # CHART TYPES PROPERTIES SETTING 125 | 126 | 127 | def add_data_to_series(serie, data_points): 128 | for data_point in data_points: 129 | x, y = data_point 130 | serie.add_data_point(x, y) 131 | 132 | 133 | def _set_pie_label_positions(chart, series, chart_data, label_position): 134 | chart.plots[0].has_data_labels = True 135 | for i in range(len(chart_data.categories)): 136 | point = series.points[i] 137 | value = series.values[i] 138 | point.data_label.text_frame.text = "{} ({:.0%})".format( 139 | chart_data.categories[i].label, value 140 | ) 141 | if label_position: 142 | point.data_label.position = label_position 143 | 144 | 145 | def set_histogram_properties(chart, chart_data): 146 | value_axis = chart.value_axis 147 | value_axis.mayor_tick_mark = XL_TICK_MARK.NONE 148 | value_axis.minor_tick_mark = XL_TICK_MARK.NONE 149 | value_axis.has_mayor_gridlines = False 150 | value_axis.has_minor_gridlines = False 151 | # value_axis.visible = False 152 | 153 | tick_labels = value_axis.tick_labels 154 | tick_labels.number_format = "0%" 155 | 156 | return chart 157 | 158 | 159 | def set_pie_properties(chart, chart_data): 160 | if chart and chart_data: 161 | chart.has_legend = False 162 | chart.has_title = False 163 | 164 | # Data points 165 | series = chart.series[0] 166 | # Check if there are small values that can't be contained on the pie piece 167 | label_position = ( 168 | XL_LABEL_POSITION.OUTSIDE_END 169 | if any(t < 0.10 for t in series.values) 170 | else XL_LABEL_POSITION.CENTER 171 | ) 172 | 173 | # set labels to contain category and value 174 | _set_pie_label_positions(chart, series, chart_data, label_position) 175 | 176 | 177 | def set_doughnut_properties(chart, chart_data): 178 | if chart and chart_data: 179 | chart.has_legend = False 180 | series = chart.series[0] 181 | _set_pie_label_positions(chart, series, chart_data, None) 182 | 183 | 184 | def create_set_scatter_properties(x_label, y_label): 185 | def set_scatter_properties(chart, chart_data): 186 | chart.has_legend = False 187 | x_axis = chart.category_axis 188 | y_axis = chart.value_axis 189 | 190 | # TODO: Fix it so that this actually has a title 191 | # x_axis.has_title = True 192 | # y_axis.has_title = True 193 | 194 | return set_scatter_properties 195 | 196 | 197 | # CHART TYPES 198 | PIE = XL_CHART_TYPE.PIE, set_pie_properties 199 | PROCENT_HISTOGRAM = XL_CHART_TYPE.COLUMN_CLUSTERED, set_histogram_properties 200 | DOUGHNUT = XL_CHART_TYPE.DOUGHNUT, set_doughnut_properties 201 | 202 | # CHART DATA GENERATOR 203 | 204 | _YES_NO_CHART_TYPES = PIE, PROCENT_HISTOGRAM, DOUGHNUT 205 | 206 | 207 | def generate_yes_no_large_funny_answer_chart_data(presentation_context): 208 | title = yes_no_question_generator(presentation_context) 209 | 210 | presentation_context["chart_title"] = title 211 | 212 | categories = ["Yes", "No", funny_yes_no_answer_generator(presentation_context)] 213 | series_data = normalise_data( 214 | create_equal_data_with_outlier_end(len(categories), 0.2, 1, 2.5, 1, 20) 215 | ) 216 | 217 | chart_data = ChartData() 218 | chart_data.categories = categories 219 | chart_data.add_series("", series_data) 220 | return title, chart_data 221 | 222 | 223 | def _generate_conceptnet_data( 224 | presentation_context, title_generator, conceptnet_function 225 | ): 226 | seed = presentation_context["seed"] 227 | title = title_generator(presentation_context) 228 | 229 | presentation_context["chart_title"] = title 230 | 231 | conceptnet_relations = conceptnet_function(seed) 232 | 233 | if conceptnet_relations: 234 | conceptnet_relations = conceptnet.remove_duplicates(conceptnet_relations) 235 | conceptnet_relations = conceptnet.remove_containing(conceptnet_relations, seed) 236 | random.shuffle(conceptnet_relations) 237 | 238 | conceptnet_relations = conceptnet_relations[0 : random.randint(2, 5)] 239 | categories = [location[1] for location in conceptnet_relations] 240 | values = [float(location[0]) ** 2 for location in conceptnet_relations] 241 | 242 | if len(categories) == 0: 243 | return None 244 | series_data = normalise_data(values) 245 | 246 | chart_data = ChartData() 247 | chart_data.categories = categories 248 | chart_data.add_series("", series_data) 249 | return title, chart_data 250 | 251 | 252 | def generate_location_data(presentation_context): 253 | return _generate_conceptnet_data( 254 | presentation_context, 255 | location_question_generator, 256 | conceptnet.get_weighted_related_locations, 257 | ) 258 | 259 | 260 | def generate_property_data(presentation_context): 261 | return _generate_conceptnet_data( 262 | presentation_context, 263 | property_question_generator, 264 | conceptnet.get_weighted_properties, 265 | ) 266 | 267 | 268 | # FULL CHART GENERATORS 269 | 270 | 271 | def generate_yes_no_pie(presentation_context): 272 | title, chart_data = generate_yes_no_large_funny_answer_chart_data( 273 | presentation_context 274 | ) 275 | chart_type, chart_modifier = random.choice(_YES_NO_CHART_TYPES) 276 | return title, chart_type, chart_data, chart_modifier 277 | 278 | 279 | def generate_location_pie(presentation_context): 280 | result = generate_location_data(presentation_context) 281 | if result: 282 | title, chart_data = result 283 | chart_type, chart_modifier = random.choice(_YES_NO_CHART_TYPES) 284 | return title, chart_type, chart_data, chart_modifier 285 | 286 | 287 | def generate_property_pie(presentation_context): 288 | result = generate_property_data(presentation_context) 289 | if result: 290 | title, chart_data = result 291 | chart_type, chart_modifier = random.choice(_YES_NO_CHART_TYPES) 292 | return title, chart_type, chart_data, chart_modifier 293 | 294 | 295 | _CORRELATION_WORD_GENERATOR = generator_util.WalkingGenerator( 296 | generator_util.CombinedGenerator( 297 | (2, conceptnet.unweighted_antonym_generator), 298 | (1, conceptnet.unweighted_related_word_generator), 299 | ), 300 | steps=5, 301 | ) 302 | 303 | 304 | def generate_correlation_curve(presentation_context): 305 | x_label = presentation_context["topic"] 306 | y_label = presentation_context["seed"] 307 | 308 | if is_too_similar_for_axes(x_label, y_label): 309 | x_label = _CORRELATION_WORD_GENERATOR(y_label) 310 | if is_too_similar_for_axes(x_label, y_label): 311 | x_label = "time" 312 | presentation_context.update({"x_label": x_label, "y_label": y_label}) 313 | 314 | title = correlation_title_generator(presentation_context) 315 | 316 | if not title: 317 | return None 318 | 319 | chart_data = XyChartData() 320 | 321 | serie = chart_data.add_series("Model") 322 | 323 | # Generate some Xs, with chance of exponential differences in size between generated x axes 324 | xs = generate_random_x( 325 | 0, 2 ** random.uniform(1, 10), int(2 ** random.uniform(3, 8)) 326 | ) 327 | 328 | # Generate y 329 | data_points = generate_y(xs, create_interesting_curve_function()) 330 | 331 | max_x = max(xs) 332 | 333 | data_points = add_gaussian_noise_to_multidim_points( 334 | 1.5 * random.uniform(0, max_x / 10), data_points 335 | ) 336 | 337 | # Remove negatives 338 | data_points = [(abs(datapoint[0]), abs(datapoint[1])) for datapoint in data_points] 339 | 340 | add_data_to_series(serie, data_points) 341 | 342 | return ( 343 | title, 344 | XL_CHART_TYPE.XY_SCATTER, 345 | chart_data, 346 | create_set_scatter_properties(x_label, y_label), 347 | ) 348 | -------------------------------------------------------------------------------- /talkgenerator/util/generator_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Light, commonly used, non-specific generators that are helpful shortcuts for creating 3 | certain types of (content) generators 4 | """ 5 | import os 6 | import sys 7 | import random 8 | import logging 9 | import inspect 10 | from typing import Callable, Optional, Dict, Union, Tuple 11 | 12 | import requests 13 | 14 | from talkgenerator.datastructures.image_data import ImageData 15 | from talkgenerator.util import random_util, os_util 16 | 17 | logger = logging.getLogger("talkgenerator") 18 | 19 | 20 | def fullname(o): 21 | # o.__module__ + "." + o.__class__.__qualname__ is an example in 22 | # this context of H.L. Mencken's "neat, plausible, and wrong." 23 | # Python makes no guarantees as to whether the __module__ special 24 | # attribute is defined, so we take a more circumspect approach. 25 | # Alas, the module name is explicitly excluded from __qualname__ 26 | # in Python 3. 27 | 28 | module = o.__class__.__module__ 29 | if module is None or module == str.__class__.__module__: 30 | return o.__class__.__name__ # Avoid reporting __builtin__ 31 | else: 32 | return module + '.' + o.__class__.__name__ 33 | 34 | 35 | class Generator(object): 36 | def __call__(self, seed: str): 37 | raise NotImplemented( 38 | str(self) + " has not provided an implementation for the generator" 39 | ) 40 | 41 | 42 | class PrefixedGenerator(Generator): 43 | def __init__(self, prefix: str, generator: Generator): 44 | self._prefix = prefix 45 | self._generator = generator 46 | 47 | def __call__(self, seed: str): 48 | return self._generator(self._prefix + " " + seed) 49 | 50 | 51 | class PrefixedPresentationContextGenerator(Generator): 52 | def __init__(self, prefix: str, generator): 53 | self._prefix = prefix 54 | self._generator = generator 55 | 56 | def __call__(self, presentation_context): 57 | presentation_context = dict(presentation_context) 58 | presentation_context["seed"] = self._prefix + " " + presentation_context["seed"] 59 | return self._generator(presentation_context) 60 | 61 | 62 | class CombinedGenerator(Generator): 63 | def __init__(self, *weighted_generators: Tuple[Union[int, float], Generator]): 64 | self._weighted_generators = weighted_generators 65 | 66 | def __call__(self, seed: Union[str, Dict[str, str]]): 67 | logger.debug('Calling generator_util.CombinedGenerator') 68 | current_weighted_generators = list(self._weighted_generators) 69 | logger.debug("current_weighted_generators: {}".format(current_weighted_generators)) 70 | while len(current_weighted_generators) > 0: 71 | generator = random_util.weighted_random(current_weighted_generators) 72 | logger.debug("current generator: {}".format(generator)) 73 | logger.debug("generator seed: {}".format(seed)) 74 | generated = generator(seed) 75 | logger.debug("generated: {}".format(generated)) 76 | if generated is not None: 77 | return generated 78 | _remove_object_from_weighted_list(current_weighted_generators, generator) 79 | 80 | 81 | def _remove_object_from_weighted_list(current_weighted_generators, generator): 82 | for i in current_weighted_generators: 83 | if i and i[1] == generator: 84 | current_weighted_generators.remove(i) 85 | 86 | 87 | class MappedGenerator(Generator): 88 | def __init__(self, generator, *functions): 89 | self._generator = generator 90 | self._functions = functions 91 | 92 | def __call__(self, presentation_context): 93 | # print("MappedGenerator generator using", presentation_context) 94 | generated = self._generator(presentation_context) 95 | for func in self._functions: 96 | generated = func(generated) 97 | return generated 98 | 99 | 100 | class TupledGenerator(Generator): 101 | """ Creates a tuple generator that generates every tuple value independent from the others""" 102 | 103 | def __init__(self, *generators): 104 | self._generators = generators 105 | 106 | def __call__(self, presentation_context): 107 | # print("TupledGenerator generator using", presentation_context) 108 | return tuple( 109 | [generator(presentation_context) for generator in self._generators] 110 | ) 111 | 112 | 113 | class InspiredTupleGenerator(Generator): 114 | """ The second generator will get the generator 1 as input, outputting the tuple """ 115 | 116 | def __init__(self, generator_1, generator_2): 117 | self._generator_1 = generator_1 118 | self._generator_2 = generator_2 119 | 120 | def __call__(self, presentation_context): 121 | # print("InspiredTupleGenerator generator using", presentation_context) 122 | gen_1 = self._generator_1(presentation_context) 123 | gen_2 = self._generator_2(gen_1) 124 | return gen_1, gen_2 125 | 126 | 127 | # == TRIVIAL GENERATORS == 128 | 129 | 130 | class SeededGenerator(Generator): 131 | def __init__(self, simple_generator): 132 | self._simple_generator = simple_generator 133 | 134 | def __call__(self, presentation_context): 135 | logger.debug('Calling generator_util.SeededGenerator') 136 | logger.debug('presentation_context: {}'.format(presentation_context)) 137 | logger.debug('self._simple_generator: {}'.format(self._simple_generator)) 138 | return self._simple_generator(presentation_context["seed"]) 139 | 140 | 141 | class UnseededGenerator(Generator): 142 | def __init__(self, simple_generator): 143 | self._simple_generator = simple_generator 144 | 145 | def __call__(self, seed): 146 | presentation_context = {"seed": seed} 147 | return self._simple_generator(presentation_context) 148 | 149 | 150 | class NoneGenerator(Generator): 151 | def __init__(self): 152 | pass 153 | 154 | def __call__(self, presentation_context): 155 | return None 156 | 157 | 158 | class IdentityGenerator(Generator): 159 | def __init__(self, input_word): 160 | self._input_word = input_word 161 | 162 | def __call__(self, presentation_context): 163 | return self._input_word 164 | 165 | 166 | class TitledIdentityGenerator(Generator): 167 | def __init__(self, input_word): 168 | self._input_word = input_word 169 | 170 | def __call__(self, presentation_context): 171 | if self._input_word: 172 | return self._input_word.title() 173 | 174 | 175 | class StaticGenerator(Generator): 176 | def __init__(self, always_generate_this): 177 | self._always_generate_this = always_generate_this 178 | 179 | def __call__(self, presentation_context=None): 180 | return self._always_generate_this 181 | 182 | 183 | class FromListGenerator(Generator): 184 | def __init__(self, list_generator): 185 | self._list_generator = list_generator 186 | 187 | def __call__(self, presentation_context): 188 | return random_util.choice_optional(self._list_generator(presentation_context)) 189 | 190 | 191 | seeded_identity_generator = SeededGenerator(IdentityGenerator) 192 | seeded_titled_identity_generator = SeededGenerator(TitledIdentityGenerator) 193 | 194 | 195 | class ExternalImageListGenerator(Generator): 196 | def __init__( 197 | self, image_generator, check_image_validness=True, weighted=False, 198 | ): 199 | self._image_generator = image_generator 200 | self._check_image_validness = check_image_validness 201 | self._weighted = weighted 202 | 203 | def __call__(self, presentation_context) -> Optional[ImageData]: 204 | logger.debug('Calling generator_util.ExternalImageListGenerator') 205 | logger.debug('self._image_generator: {}'.format(self._image_generator)) 206 | logger.debug('self._check_image_validness: {}'.format(self._check_image_validness)) 207 | logger.debug('self._weighted: {}'.format(self._weighted)) 208 | logger.debug('module where function def: {}'.format(self._image_generator.__module__)) 209 | logger.debug('****************************************************************') 210 | images = self._image_generator(presentation_context) 211 | # logger.debug('images: {}'.format(images)) 212 | logger.debug('****************************************************************') 213 | 214 | while bool(images) and len(images) > 0: 215 | original_chosen_image = ( 216 | random_util.weighted_random([image for image in images if image[0] > 0]) 217 | if self._weighted 218 | else random.choice(images) 219 | ) 220 | if isinstance(original_chosen_image, str): 221 | chosen_image = ImageData(image_url=original_chosen_image) 222 | elif isinstance(original_chosen_image, ImageData): 223 | chosen_image = original_chosen_image 224 | else: 225 | logger.warning( 226 | "INVALID IMAGE INPUT FOR EXTERNAL IMAGE GENERATOR / " 227 | + str(original_chosen_image) 228 | + " / " 229 | + str(type(original_chosen_image)) 230 | ) 231 | images.remove(original_chosen_image) 232 | continue 233 | 234 | return chosen_image 235 | return None 236 | 237 | 238 | class BackupGenerator(Generator): 239 | def __init__(self, *generator_list): 240 | self._generator_list = generator_list 241 | 242 | def __call__(self, context): 243 | for generator in self._generator_list: 244 | generated = generator(context) 245 | if generated: 246 | return generated 247 | 248 | 249 | class WeightedGenerator(Generator): 250 | def __init__(self, weighted_list_creator): 251 | self._weighted_list_creator = weighted_list_creator 252 | 253 | def __call__(self, argument): 254 | weighted_list = self._weighted_list_creator(argument) 255 | if weighted_list: 256 | return random_util.weighted_random(weighted_list) 257 | 258 | 259 | class UnweightedGenerator(Generator): 260 | def __init__(self, weighted_list_creator): 261 | self._weighted_list_creator = weighted_list_creator 262 | 263 | def __call__(self, argument): 264 | weighted_list = self._weighted_list_creator(argument) 265 | if weighted_list: 266 | return random_util.choice_optional( 267 | [element[1] for element in weighted_list] 268 | ) 269 | 270 | 271 | class WalkingGenerator(Generator): 272 | """ This type of generator uses its output as input for a next step, taking concepts a few steps away """ 273 | 274 | def __init__(self, inner_generator, steps): 275 | self._inner_generator = inner_generator 276 | self._steps = steps 277 | 278 | def __call__(self, seed): 279 | history = set() 280 | history.add(seed) 281 | current = seed 282 | for i in range(self._steps): 283 | generated = self._inner_generator(current) 284 | if generated: 285 | current = generated 286 | history.add(current) 287 | 288 | return current 289 | 290 | 291 | class ImageGenerator(Generator): 292 | def __call__(self, seed: str) -> ImageData: 293 | raise NotImplementedError("Not implemented image generator") 294 | 295 | 296 | class UnsourcedImageGenerator(ImageGenerator): 297 | def __init__(self, image_url_generator: Callable[[str], str]): 298 | self._image_url_generator = image_url_generator 299 | 300 | def __call__(self, seed: str) -> ImageData: 301 | return ImageData(image_url=self._image_url_generator(seed)) 302 | 303 | 304 | class RelatedMappingGenerator(Generator): 305 | def __init__( 306 | self, related_word_generator: Callable[[str], str], generator: Generator 307 | ): 308 | self._related_word_generator = related_word_generator 309 | self._generator = generator 310 | 311 | def __call__(self, seed: str) -> Optional[str]: 312 | mapped_seed = self._related_word_generator(seed) 313 | return self._generator(mapped_seed) 314 | -------------------------------------------------------------------------------- /talkgenerator/data/eval/common_words.txt: -------------------------------------------------------------------------------- 1 | time 2 | way 3 | year 4 | work 5 | government 6 | day 7 | man 8 | world 9 | life 10 | part 11 | house 12 | course 13 | case 14 | system 15 | place 16 | end 17 | group 18 | company 19 | party 20 | information 21 | school 22 | fact 23 | money 24 | point 25 | example 26 | state 27 | business 28 | night 29 | area 30 | water 31 | thing 32 | family 33 | head 34 | hand 35 | order 36 | john 37 | side 38 | home 39 | development 40 | week 41 | power 42 | country 43 | council 44 | use 45 | service 46 | room 47 | market 48 | problem 49 | court 50 | lot 51 | a 52 | war 53 | police 54 | interest 55 | car 56 | law 57 | road 58 | form 59 | face 60 | education 61 | policy 62 | research 63 | sort 64 | office 65 | body 66 | person 67 | health 68 | mother 69 | question 70 | period 71 | name 72 | book 73 | level 74 | child 75 | control 76 | society 77 | minister 78 | view 79 | door 80 | line 81 | community 82 | south 83 | city 84 | god 85 | father 86 | centre 87 | effect 88 | staff 89 | position 90 | kind 91 | job 92 | woman 93 | action 94 | management 95 | act 96 | process 97 | north 98 | age 99 | evidence 100 | idea 101 | west 102 | support 103 | moment 104 | sense 105 | report 106 | mind 107 | church 108 | morning 109 | death 110 | change 111 | industry 112 | land 113 | care 114 | century 115 | range 116 | table 117 | back 118 | trade 119 | history 120 | study 121 | street 122 | committee 123 | rate 124 | word 125 | food 126 | language 127 | experience 128 | result 129 | team 130 | other 131 | sir 132 | section 133 | programme 134 | air 135 | authority 136 | role 137 | reason 138 | price 139 | town 140 | class 141 | nature 142 | subject 143 | department 144 | union 145 | bank 146 | member 147 | value 148 | need 149 | east 150 | practice 151 | type 152 | paper 153 | date 154 | decision 155 | figure 156 | right 157 | wife 158 | president 159 | university 160 | friend 161 | club 162 | quality 163 | voice 164 | lord 165 | stage 166 | king 167 | us 168 | situation 169 | light 170 | tax 171 | production 172 | march 173 | secretary 174 | art 175 | board 176 | may 177 | hospital 178 | month 179 | music 180 | cost 181 | field 182 | award 183 | issue 184 | bed 185 | project 186 | chapter 187 | girl 188 | game 189 | amount 190 | basis 191 | knowledge 192 | approach 193 | series 194 | love 195 | top 196 | news 197 | front 198 | future 199 | manager 200 | account 201 | computer 202 | security 203 | rest 204 | labour 205 | structure 206 | hair 207 | bill 208 | heart 209 | force 210 | attention 211 | movement 212 | success 213 | letter 214 | agreement 215 | capital 216 | analysis 217 | population 218 | environment 219 | performance 220 | model 221 | material 222 | theory 223 | growth 224 | fire 225 | chance 226 | boy 227 | relationship 228 | son 229 | sea 230 | record 231 | size 232 | property 233 | space 234 | term 235 | director 236 | plan 237 | behaviour 238 | treatment 239 | energy 240 | st 241 | peter 242 | income 243 | cup 244 | scheme 245 | design 246 | response 247 | association 248 | choice 249 | pressure 250 | hall 251 | couple 252 | technology 253 | defence 254 | list 255 | chairman 256 | loss 257 | activity 258 | contract 259 | county 260 | wall 261 | paul 262 | difference 263 | army 264 | hotel 265 | sun 266 | product 267 | summer 268 | set 269 | village 270 | colour 271 | floor 272 | season 273 | unit 274 | park 275 | hour 276 | investment 277 | test 278 | garden 279 | husband 280 | employment 281 | style 282 | science 283 | look 284 | deal 285 | charge 286 | help 287 | economy 288 | new 289 | page 290 | risk 291 | advice 292 | event 293 | picture 294 | commission 295 | fish 296 | college 297 | oil 298 | doctor 299 | opportunity 300 | film 301 | conference 302 | operation 303 | application 304 | press 305 | extent 306 | addition 307 | station 308 | window 309 | shop 310 | access 311 | region 312 | doubt 313 | majority 314 | degree 315 | television 316 | blood 317 | statement 318 | sound 319 | election 320 | parliament 321 | site 322 | mark 323 | importance 324 | title 325 | species 326 | increase 327 | return 328 | concern 329 | public 330 | competition 331 | software 332 | glass 333 | lady 334 | answer 335 | earth 336 | daughter 337 | purpose 338 | responsibility 339 | leader 340 | river 341 | eye 342 | ability 343 | appeal 344 | opposition 345 | campaign 346 | respect 347 | task 348 | instance 349 | sale 350 | whole 351 | officer 352 | method 353 | division 354 | source 355 | piece 356 | pattern 357 | lack 358 | disease 359 | equipment 360 | surface 361 | oxford 362 | demand 363 | post 364 | mouth 365 | radio 366 | provision 367 | attempt 368 | sector 369 | firm 370 | status 371 | peace 372 | variety 373 | teacher 374 | show 375 | speaker 376 | baby 377 | arm 378 | base 379 | miss 380 | safety 381 | trouble 382 | culture 383 | direction 384 | context 385 | character 386 | box 387 | discussion 388 | past 389 | weight 390 | organisation 391 | start 392 | brother 393 | league 394 | condition 395 | machine 396 | argument 397 | sex 398 | budget 399 | english 400 | transport 401 | share 402 | mum 403 | cash 404 | principle 405 | exchange 406 | aid 407 | library 408 | version 409 | rule 410 | tea 411 | balance 412 | afternoon 413 | reference 414 | protection 415 | truth 416 | district 417 | turn 418 | smith 419 | review 420 | minute 421 | duty 422 | survey 423 | presence 424 | influence 425 | stone 426 | dog 427 | benefit 428 | collection 429 | executive 430 | speech 431 | function 432 | queen 433 | marriage 434 | stock 435 | failure 436 | kitchen 437 | student 438 | effort 439 | holiday 440 | career 441 | attack 442 | length 443 | horse 444 | progress 445 | plant 446 | visit 447 | relation 448 | ball 449 | memory 450 | bar 451 | opinion 452 | quarter 453 | impact 454 | scale 455 | race 456 | image 457 | trust 458 | justice 459 | edge 460 | gas 461 | railway 462 | expression 463 | advantage 464 | gold 465 | wood 466 | network 467 | text 468 | forest 469 | sister 470 | chair 471 | cause 472 | foot 473 | rise 474 | half 475 | winter 476 | corner 477 | insurance 478 | step 479 | damage 480 | credit 481 | pain 482 | possibility 483 | legislation 484 | strength 485 | speed 486 | crime 487 | hill 488 | debate 489 | will 490 | supply 491 | present 492 | confidence 493 | mary 494 | patient 495 | wind 496 | solution 497 | band 498 | museum 499 | farm 500 | pound 501 | henry 502 | match 503 | assessment 504 | message 505 | football 506 | no 507 | animal 508 | skin 509 | scene 510 | article 511 | stuff 512 | introduction 513 | play 514 | administration 515 | fear 516 | dad 517 | proportion 518 | island 519 | contact 520 | japan 521 | claim 522 | kingdom 523 | video 524 | tv 525 | existence 526 | telephone 527 | move 528 | traffic 529 | distance 530 | relief 531 | cabinet 532 | unemployment 533 | reality 534 | target 535 | trial 536 | rock 537 | concept 538 | spirit 539 | accident 540 | organization 541 | construction 542 | coffee 543 | phone 544 | distribution 545 | train 546 | sight 547 | difficulty 548 | factor 549 | exercise 550 | weekend 551 | battle 552 | prison 553 | grant 554 | aircraft 555 | tree 556 | bridge 557 | strategy 558 | contrast 559 | communication 560 | background 561 | shape 562 | wine 563 | star 564 | hope 565 | selection 566 | detail 567 | user 568 | path 569 | client 570 | search 571 | master 572 | rain 573 | offer 574 | goal 575 | dinner 576 | freedom 577 | attitude 578 | while 579 | agency 580 | seat 581 | manner 582 | favour 583 | fig. 584 | pair 585 | crisis 586 | smile 587 | prince 588 | danger 589 | call 590 | capacity 591 | output 592 | note 593 | procedure 594 | theatre 595 | tour 596 | recognition 597 | middle 598 | absence 599 | sentence 600 | package 601 | track 602 | card 603 | sign 604 | commitment 605 | player 606 | threat 607 | weather 608 | element 609 | conflict 610 | notice 611 | victory 612 | bottom 613 | finance 614 | fund 615 | violence 616 | file 617 | profit 618 | standard 619 | jack 620 | route 621 | china 622 | expenditure 623 | second 624 | discipline 625 | cell 626 | pp. 627 | reaction 628 | castle 629 | congress 630 | individual 631 | lead 632 | consideration 633 | debt 634 | option 635 | payment 636 | exhibition 637 | reform 638 | emphasis 639 | spring 640 | audience 641 | feature 642 | touch 643 | estate 644 | assembly 645 | volume 646 | youth 647 | contribution 648 | curriculum 649 | appearance 650 | martin 651 | tom 652 | boat 653 | institute 654 | membership 655 | branch 656 | bus 657 | waste 658 | heat 659 | neck 660 | object 661 | captain 662 | driver 663 | challenge 664 | conversation 665 | occasion 666 | code 667 | crown 668 | birth 669 | silence 670 | literature 671 | faith 672 | hell 673 | entry 674 | transfer 675 | gentleman 676 | bag 677 | coal 678 | investigation 679 | leg 680 | belief 681 | total 682 | major 683 | document 684 | description 685 | murder 686 | aim 687 | manchester 688 | flight 689 | conclusion 690 | drug 691 | tradition 692 | pleasure 693 | connection 694 | owner 695 | treaty 696 | tony 697 | alan 698 | desire 699 | professor 700 | copy 701 | ministry 702 | acid 703 | palace 704 | address 705 | institution 706 | lunch 707 | generation 708 | partner 709 | engine 710 | newspaper 711 | cross 712 | reduction 713 | welfare 714 | definition 715 | key 716 | release 717 | vote 718 | examination 719 | judge 720 | atmosphere 721 | leadership 722 | sky 723 | breath 724 | creation 725 | row 726 | guide 727 | milk 728 | cover 729 | screen 730 | intention 731 | criticism 732 | jones 733 | silver 734 | customer 735 | journey 736 | explanation 737 | green 738 | measure 739 | brain 740 | significance 741 | phase 742 | injury 743 | run 744 | coast 745 | technique 746 | valley 747 | drink 748 | magazine 749 | potential 750 | drive 751 | revolution 752 | bishop 753 | settlement 754 | christ 755 | metal 756 | motion 757 | index 758 | adult 759 | inflation 760 | sport 761 | surprise 762 | pension 763 | factory 764 | tape 765 | flow 766 | iron 767 | trip 768 | lane 769 | pool 770 | independence 771 | hole 772 | un 773 | flat 774 | content 775 | pay 776 | noise 777 | combination 778 | session 779 | appointment 780 | fashion 781 | consumer 782 | accommodation 783 | temperature 784 | mike 785 | religion 786 | author 787 | nation 788 | northern 789 | sample 790 | assistance 791 | interpretation 792 | aspect 793 | display 794 | shoulder 795 | agent 796 | gallery 797 | republic 798 | cancer 799 | proposal 800 | sequence 801 | simon 802 | ship 803 | interview 804 | vehicle 805 | democracy 806 | improvement 807 | involvement 808 | general 809 | enterprise 810 | van 811 | meal 812 | breakfast 813 | motor 814 | channel 815 | impression 816 | tone 817 | sheet 818 | pollution 819 | bob 820 | beauty 821 | square 822 | vision 823 | spot 824 | distinction 825 | brown 826 | crowd 827 | fuel 828 | desk 829 | sum 830 | decline 831 | revenue 832 | fall 833 | diet 834 | bedroom 835 | soil 836 | reader 837 | shock 838 | fruit 839 | behalf 840 | deputy 841 | roof 842 | nose 843 | steel 844 | co 845 | artist 846 | graham 847 | plate 848 | song 849 | maintenance 850 | formation 851 | grass 852 | spokesman 853 | ice 854 | talk 855 | program 856 | link 857 | ring 858 | expert 859 | establishment 860 | plastic 861 | candidate 862 | rail 863 | passage 864 | joe 865 | parish 866 | ref 867 | emergency 868 | liability 869 | identity 870 | location 871 | framework 872 | strike 873 | countryside 874 | map 875 | lake 876 | household 877 | approval 878 | border 879 | bottle 880 | bird 881 | constitution 882 | autumn 883 | cat 884 | agriculture 885 | concentration 886 | guy 887 | dress 888 | victim 889 | mountain 890 | editor 891 | theme 892 | error 893 | loan 894 | stress 895 | recovery 896 | electricity 897 | recession 898 | wealth 899 | request 900 | comparison 901 | lewis 902 | white 903 | walk 904 | focus 905 | chief 906 | parent 907 | sleep 908 | mass 909 | jane 910 | bush 911 | foundation 912 | bath 913 | item 914 | lifespan 915 | lee 916 | publication 917 | decade 918 | beach 919 | sugar 920 | height 921 | charity 922 | writer 923 | panel 924 | struggle 925 | dream 926 | outcome 927 | efficiency 928 | offence 929 | resolution 930 | reputation 931 | specialist 932 | taylor 933 | pub 934 | co-operation 935 | port 936 | incident 937 | representation 938 | bread 939 | chain 940 | initiative 941 | clause 942 | resistance 943 | mistake 944 | worker 945 | advance 946 | empire 947 | notion 948 | mirror 949 | delivery 950 | chest 951 | licence 952 | frank 953 | average 954 | awareness 955 | travel 956 | expansion 957 | block 958 | alternative 959 | chancellor 960 | meat 961 | store 962 | self 963 | break 964 | drama 965 | corporation 966 | currency 967 | extension 968 | convention 969 | partnership 970 | skill 971 | furniture 972 | round 973 | regime 974 | inquiry 975 | rugby 976 | philosophy 977 | scope 978 | gate 979 | minority 980 | intelligence 981 | restaurant 982 | consequence 983 | mill 984 | golf 985 | retirement 986 | priority 987 | plane 988 | gun 989 | gap 990 | core 991 | uncle 992 | thatcher 993 | fun 994 | arrival 995 | snow 996 | no 997 | command 998 | abuse 999 | limit 1000 | championship --------------------------------------------------------------------------------