├── .dockerignore ├── .gitattributes ├── .gitignore ├── .gitmodules ├── .idea └── .gitignore ├── LICENSE ├── README.md ├── agents ├── __init__.py ├── local_agent.py ├── port_forwarding.sh ├── portforwarding.sh ├── remote_non_persistent.py └── remote_psql_persistent.py ├── chirpy ├── __init__.py ├── annotators │ ├── __init__.py │ ├── blenderbot.py │ ├── colbertinfiller.py │ ├── convpara.py │ ├── coref.py │ ├── corenlp.py │ ├── dialogact.py │ ├── dialogptranker.py │ ├── emotion.py │ ├── g2p.py │ ├── gpt2ed.py │ ├── gpt2ranker.py │ ├── infiller.py │ ├── navigational_intent │ │ ├── __init__.py │ │ ├── navigational_intent.py │ │ └── regex_templates.py │ ├── neural_entity_linker.py │ ├── question.py │ ├── responseranker.py │ ├── sentseg.py │ └── stanfordnlp.py ├── core │ ├── __init__.py │ ├── asr │ │ ├── __init__.py │ │ ├── g2p.py │ │ ├── index_phone_to_ent.py │ │ ├── lattice.py │ │ └── search_phone_to_ent.py │ ├── blacklists │ │ ├── __init__.py │ │ └── blacklists.py │ ├── callables.py │ ├── canary.py │ ├── default_rg_states.py │ ├── dialog_manager.py │ ├── entity_linker │ │ ├── __init__.py │ │ ├── entity_groups.py │ │ ├── entity_linker.py │ │ ├── entity_linker_classes.py │ │ ├── entity_linker_simple.py │ │ ├── lists.py │ │ ├── low_prec.txt │ │ ├── resolve_conflicts.py │ │ ├── short_keys.txt │ │ ├── thresholds.py │ │ ├── untalkable_wikidata_classes.txt │ │ ├── util.py │ │ └── wiki_data_fetching.py │ ├── entity_tracker │ │ └── entity_tracker.py │ ├── es_config.json │ ├── experiment.py │ ├── flags.py │ ├── handler.py │ ├── latency.py │ ├── logging_formatting.py │ ├── logging_utils.py │ ├── offensive_classifier │ │ ├── __init__.py │ │ ├── data_original │ │ │ ├── README.txt │ │ │ └── full-list-of-bad-words_text-file_2018_07_30.txt │ │ ├── data_preprocessed │ │ │ ├── README.txt │ │ │ └── offensive_phrases_preprocessed.txt │ │ ├── offensive_classifier.py │ │ ├── preprocess_blacklist.py │ │ └── util.py │ ├── priority_ranking_strategy.py │ ├── regex │ │ ├── __init__.py │ │ ├── names.txt │ │ ├── regex_template.py │ │ ├── response_lists.py │ │ ├── templates.py │ │ ├── test_basic_regexes.py │ │ ├── util.py │ │ └── word_lists.py │ ├── response_generator │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── neural_helpers.py │ │ ├── regex_templates │ │ │ ├── __init__.py │ │ │ ├── change_topic_template.py │ │ │ ├── disinterested_template.py │ │ │ ├── no_template.py │ │ │ ├── request_repeat.py │ │ │ ├── say_that_again_template.py │ │ │ └── yes_template.py │ │ ├── response_generator.py │ │ ├── response_template.py │ │ ├── response_type.py │ │ ├── state.py │ │ └── treelet.py │ ├── response_generator_datatypes.py │ ├── response_priority.py │ ├── smooth_handoffs.py │ ├── state.py │ ├── state_manager.py │ ├── test_args.py │ ├── user_attributes.py │ └── util.py ├── data │ ├── cmudict.pkl │ ├── long_stopwords.text │ ├── long_stopwords.txt │ └── spoken_unigram_freqs.csv └── response_generators │ ├── __init__.py │ ├── acknowledgment │ ├── __init__.py │ ├── acknowledgment_helpers.py │ ├── acknowledgment_response_generator.py │ └── state.py │ ├── aliens │ ├── __init__.py │ ├── aliens_helpers.py │ ├── aliens_response_generator.py │ ├── aliens_responses.py │ ├── regex_templates │ │ ├── __init__.py │ │ ├── disinterested_template.py │ │ ├── no_template.py │ │ └── yes_template.py │ ├── state.py │ └── treelets │ │ ├── __init__.py │ │ ├── introductory_treelet.py │ │ ├── question_treelet.py │ │ ├── turn_fifth_treelet.py │ │ ├── turn_first_treelet.py │ │ ├── turn_fourth_treelet.py │ │ ├── turn_second_treelet.py │ │ └── turn_third_treelet.py │ ├── categories │ ├── categories.json │ ├── categories.py │ ├── categories_helpers.py │ ├── categories_response_generator.py │ ├── regex_templates.py │ ├── state.py │ └── treelets │ │ ├── handle_answer_treelet.py │ │ └── introductory_treelet.py │ ├── closing_confirmation │ ├── __init__.py │ ├── closing_confirmation_response_generator.py │ └── state.py │ ├── complaint │ ├── __init__.py │ ├── complaint_helpers.py │ └── complaint_response_generator.py │ ├── fallback │ ├── __init__.py │ ├── fallback_response_generator.py │ ├── response_templates.py │ └── state.py │ ├── food │ ├── __init__.py │ ├── food_helpers.py │ ├── food_response_generator.py │ ├── regex_templates │ │ ├── __init__.py │ │ ├── regex_templates.py │ │ ├── scraped_data.json │ │ ├── scraped_final.json │ │ └── word_lists.py │ ├── scraper.py │ ├── state.py │ └── treelets │ │ ├── acknowledge_user_comment.py │ │ ├── ask_favorite_food_treelet.py │ │ ├── comment_on_favorite_type_treelet.py │ │ ├── data.json │ │ ├── factoid_treelet.py │ │ ├── get_other_type_treelet.py │ │ ├── introductory_treelet.py │ │ └── open_ended_user_comment_treelet.py │ ├── launch │ ├── __init__.py │ ├── launch_helpers.py │ ├── launch_response_generator.py │ ├── state.py │ └── treelets │ │ ├── __init__.py │ │ ├── first_turn_treelet.py │ │ ├── handle_name_treelet.py │ │ └── recognized_name_treelet.py │ ├── music │ ├── expression_lists.py │ ├── music_helpers.py │ ├── music_response_generator.py │ ├── regex_templates │ │ ├── __init__.py │ │ ├── name_favorite_song_template.py │ │ └── word_lists.py │ ├── response_templates │ │ ├── general_templates.py │ │ └── handle_opinion_template.py │ ├── state.py │ ├── treelets │ │ ├── __init__.py │ │ ├── ask_singer_treelet.py │ │ ├── ask_song_treelet.py │ │ ├── get_instrument_treelet.py │ │ ├── get_singer_treelet.py │ │ ├── get_song_treelet.py │ │ ├── handle_opinion_treelet.py │ │ ├── handoff_treelet.py │ │ └── introductory_treelet.py │ └── utils.py │ ├── neural_chat │ ├── __init__.py │ ├── neural_chat_response_generator.py │ ├── state.py │ ├── treelets │ │ ├── __init__.py │ │ ├── abstract_treelet.py │ │ ├── currentandrecentactivities_treelet.py │ │ ├── emotions │ │ │ ├── __init__.py │ │ │ ├── classify_mood.py │ │ │ └── emotions_treelet.py │ │ ├── familymember_treelets │ │ │ ├── __init__.py │ │ │ ├── familymember_treelets.py │ │ │ └── util.py │ │ ├── food_treelet.py │ │ ├── futureactivities_treelet.py │ │ ├── generalactivities_treelet.py │ │ ├── icebreaker_treelet.py │ │ └── livingsituation_treelet.py │ └── util.py │ ├── neural_fallback │ ├── __init__.py │ ├── neural_fallback_response_generator.py │ ├── neural_helpers.py │ └── state.py │ ├── offensive_user │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── contextual_responses.csv │ │ ├── gen_noncontextual_categorical_responses.py │ │ ├── label.py │ │ ├── noncontextual_categorical_responses.csv │ │ ├── offensive_responses.csv │ │ ├── templates.py │ │ └── type_of_offenses.csv │ ├── offensive_user_helpers.py │ ├── offensive_user_response_generator.py │ └── state.py │ ├── one_turn_hack │ ├── __init__.py │ ├── one_turn_hack_response_generator.py │ ├── one_turn_hack_utils.py │ ├── regex_templates │ │ ├── __init__.py │ │ ├── chatty_template.py │ │ ├── compliment_template.py │ │ ├── request_age_template.py │ │ ├── request_conversation_template.py │ │ ├── request_name_template.py │ │ ├── request_play_template.py │ │ ├── request_story_template.py │ │ └── say_that_again_template.py │ ├── responses.py │ └── state.py │ ├── opinion2 │ ├── __init__.py │ ├── abstract_policy.py │ ├── constants.py │ ├── opinion_regex_templates.py │ ├── opinion_response_generator.py │ ├── opinion_sql.py │ ├── phrasing │ │ └── meta_templates.csv │ ├── playground.py │ ├── policies │ │ ├── always_disagree_policy.py │ │ ├── baseline_policies.py │ │ ├── disagree_agree_policy.py │ │ ├── disagree_agree_switch_agree_policy.py │ │ ├── disagree_switch_agree_policy.py │ │ ├── one_turn_agree_policy.py │ │ ├── short_soft_disagree_policy.py │ │ ├── soft_disagree_switch_agree_policy.py │ │ └── two_turn_agree_policy.py │ ├── state_actions.py │ ├── user_interest │ │ ├── common_solicit_agree_responses.csv │ │ ├── common_solicit_opinion_responses.csv │ │ ├── common_solicit_opinion_responses_labeled.csv │ │ ├── common_solicit_reason_responses.csv │ │ ├── common_solicit_reason_responses_labeled.csv │ │ └── label.py │ ├── utils.py │ └── utterancify.py │ ├── personal_issues │ ├── __init__.py │ ├── personal_issues_helpers.py │ ├── personal_issues_response_generator.py │ ├── regex_templates │ │ ├── __init__.py │ │ ├── change_topic_template.py │ │ ├── first_person_template.py │ │ ├── gratitude_template.py │ │ ├── negative_emotion_template.py │ │ ├── personal_pronoun_template.py │ │ ├── personal_sharing_template.py │ │ └── yes_template.py │ ├── response_templates │ │ ├── __init__.py │ │ ├── ending_response_template.py │ │ ├── first_turn_response_template.py │ │ ├── gpt_prefix_response_template.py │ │ ├── paraphraser.py │ │ ├── personal_issues_templates.py │ │ ├── possible_continue_response_templates.py │ │ ├── response_components.py │ │ └── subsequent_turn_response_template.py │ ├── state.py │ └── treelets │ │ ├── __init__.py │ │ ├── ending_treelet.py │ │ ├── first_turn_treelet.py │ │ ├── possible_continue_accepted_treelet.py │ │ ├── possible_continue_treelet.py │ │ └── subsequent_turn_treelet.py │ ├── red_question │ ├── __init__.py │ ├── red_question_helpers.py │ ├── red_question_response_generator.py │ └── regex_templates │ │ ├── __init__.py │ │ └── are_you_recording_template.py │ ├── transition │ ├── __init__.py │ ├── failed_transition_to_entities.py │ ├── state.py │ ├── transition_helpers.py │ └── transition_response_generator.py │ └── wiki2 │ ├── __init__.py │ ├── acks_templates.txt │ ├── blacklists.py │ ├── category_associations.txt │ ├── category_ranking.txt │ ├── doctest.sh │ ├── first_turn_templates_notes.md │ ├── handwritten_infills.json │ ├── pronouns.py │ ├── regex_templates │ ├── __init__.py │ ├── acknowledgment_template.py │ ├── confused_template.py │ ├── did_you_know_template.py │ └── receptive_template.py │ ├── response_templates.json │ ├── response_templates │ ├── AcknowledgeUserKnowledgeTemplate.py │ ├── CheckUserKnowledgeTemplate.py │ ├── __init__.py │ └── response_components.py │ ├── section_templates.json │ ├── state.py │ ├── treelets │ ├── TILtreelet.py │ ├── __init__.py │ ├── acknowledge_user_knowledge_treelet.py │ ├── check_user_knowledge_treelet.py │ ├── discuss_article_treelet.py │ ├── discuss_section_further_treelet.py │ ├── discuss_section_treelet.py │ ├── factoid_treelet.py │ ├── get_opinion_treelet.py │ ├── intro_entity_treelet.py │ ├── open_question_treelet.py │ ├── open_questions.py │ └── recheck_interest_treelet.py │ ├── wiki_helpers.py │ ├── wiki_infiller.py │ ├── wiki_response_generator.py │ └── wiki_utils.py ├── exclude_files.txt ├── find-old-lines.pl ├── find-oldest-line.sh ├── local_test_integ.sh ├── local_test_regex.sh ├── precompiled └── psycopg2 │ ├── __init__.py │ ├── _ipaddress.py │ ├── _json.py │ ├── _psycopg.cpython-37m-x86_64-linux-gnu.so │ ├── _range.py │ ├── errorcodes.py │ ├── extensions.py │ ├── extras.py │ ├── pool.py │ ├── psycopg1.py │ ├── sql.py │ └── tz.py ├── requirements.txt ├── scrapers ├── reddit_til │ └── stream_es.py └── twitter │ ├── entity_clusters_linked.csv │ ├── init_es.py │ ├── init_postgres.py │ ├── label.py │ ├── requirements.txt │ ├── stream.py │ ├── stream_postgres.py │ └── upload_entity_clusters_linked.py ├── servers ├── __init__.py ├── local │ ├── __init__.py │ ├── local_callable_manager.py │ └── shell_chat.py ├── remote │ ├── Dockerfile │ ├── README.md │ ├── chat_api.py │ ├── requirements.txt │ └── static │ │ ├── chirpy.js │ │ ├── index.html │ │ ├── send.svg │ │ └── style.css └── subsystem │ └── subsytem_server.py ├── test ├── Dockerfile_regex ├── __init__.py └── integration_tests │ ├── __init__.py │ ├── categories.py │ ├── closing_confirmation.py │ ├── commands.py │ ├── corenlp.py │ ├── coronavirus.py │ ├── dialogact.py │ ├── entity_linker.py │ ├── entity_tracker.py │ ├── flags.py │ ├── gpu_tests │ ├── neural_chat.py │ └── wiki_convpara.py │ ├── integration_base.py │ ├── launch.py │ ├── nav_intent.py │ ├── offensive_classifier.py │ ├── offensiveuser.py │ ├── one_turn_hack.py │ ├── opinion.py │ ├── question.py │ ├── red_question.py │ ├── stanfordnlp.py │ ├── stop.py │ └── wiki.py └── wiki-es-dump ├── define_es.py ├── preprocess.py ├── requirements.txt ├── upload.py └── wiki-setup.md /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | convpara -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # files/dirs marked as export-ignore are not included in "git archive" commands 2 | # https://feeding.cloud.geek.nz/posts/excluding-files-from-git-archive/ 3 | # You need to COMMIT your changes to .gitattributes before it will have effect 4 | .git export-ignore 5 | convpara export-ignore 6 | doc export-ignore 7 | notebooks export-ignore 8 | cruft export-ignore 9 | entity_linker_eval export-ignore 10 | idf export-ignore 11 | docker export-ignore 12 | 13 | *.ipynb diff=jupyternotebook 14 | 15 | *.ipynb merge=jupyternotebook 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | interactive_mode_logs/* 2 | integration_test_logs/* 3 | integ_test_results.txt 4 | aws_integration_test.log 5 | aws_gpu_integration_test.log 6 | aws_regex_test.log 7 | */__pycache__/* 8 | *~ 9 | rendered/ 10 | *.pyc 11 | .mypy_cache 12 | .vscode 13 | .idea 14 | .DS_Store 15 | 16 | 17 | integ_test/local_test.log 18 | 19 | Pipfile 20 | Pipfile.lock 21 | 22 | .DS_Store 23 | docker/redquestiondetector/dataset/ 24 | 25 | 26 | notebooks/inspect_idf.ipynb 27 | notebooks/gpt2_conv_testing.ipynb 28 | notebooks/frequent_entities.json 29 | notebooks/personal_topics.ipynb 30 | notebooks/.ipynb_checkpoints/* 31 | notebooks/convs-*.json 32 | notebooks/annotated-convs-*.json 33 | notebooks/neuralchat_data/ 34 | chirpy/core/entity_linker/wikilinks_results_*.txt 35 | chirpy/core/entity_linker/*exs/* 36 | chirpy/annotators/navigational_intent/test_nav_intent.py 37 | 38 | notebooks/common-utterances-*.json 39 | entity_linker_eval/eval_results/* 40 | entity_linker_eval/data/* 41 | chirpy/core/entity_linker/entity_linker_latency.png 42 | chirpy/core/entity_tracker/entity_tracker_latency.png 43 | idf/*idfs.json 44 | 45 | alexa.zip 46 | 47 | convpara/* 48 | 49 | chirpy/core/es_config.json 50 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/.gitmodules -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/agents/__init__.py -------------------------------------------------------------------------------- /agents/port_forwarding.sh: -------------------------------------------------------------------------------- 1 | kubectl port-forward corenlp-7fd4974bb-8mq5g 4080:5001 -n chirpy & 2 | kubectl port-forward dialogact-849b4b67d8-dtqvt 4081:5001 -n chirpy & 3 | kubectl port-forward g2p-7644ff75bd-cjj57 4082:5001 -n chirpy & 4 | kubectl port-forward gpt2ed-68f849f64b-wr8zw 4083:5001 -n chirpy & 5 | kubectl port-forward questionclassifier-668c4fd6c6-fd586 4084:5001 -n chirpy & 6 | kubectl port-forward convpara-dbdc8dcfb-csktj 4085:5001 -n chirpy & 7 | kubectl port-forward entitylinker-59b9678b8-nmwx9 4086:5001 -n chirpy & 8 | kubectl port-forward blenderbot-695c7b5896-gkz2s 4087:5001 -n chirpy & 9 | kubectl port-forward responseranker-666ff584c6-nr9f4 4088:5001 -n chirpy & 10 | kubectl port-forward stanfordnlp-6894cd686b-j2qk2 4089:5001 -n chirpy & 11 | kubectl port-forward infiller-bfdf7d9cb-ms7x2 4090:5001 -n chirpy & 12 | kubectl port-forward postgresql-postgresql-0 5432:5432 -n chirpy -------------------------------------------------------------------------------- /agents/portforwarding.sh: -------------------------------------------------------------------------------- 1 | kubectl port-forward deployment/corenlp 4080:5001 -n chirpy & 2 | kubectl port-forward deployment/dialogact 4081:5001 -n chirpy & 3 | kubectl port-forward deployment/g2p 4082:5001 -n chirpy & 4 | kubectl port-forward deployment/gpt2ed 4083:5001 -n chirpy & 5 | kubectl port-forward deployment/questionclassifier 4084:5001 -n chirpy & 6 | # kubectl port-forward deployment/convpara 4085:5001 -n chirpy & 7 | kubectl port-forward deployment/entitylinker 4086:5001 -n chirpy & 8 | kubectl port-forward deployment/blenderbot 4087:5001 -n chirpy & 9 | kubectl port-forward deployment/responseranker 4088:5001 -n chirpy & 10 | kubectl port-forward deployment/stanfordnlp 4089:5001 -n chirpy & 11 | kubectl port-forward deployment/infiller 4090:5001 -n chirpy & 12 | # kubectl port-forward deployment/postgresql 5432:5432 -n chirpy -------------------------------------------------------------------------------- /chirpy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/__init__.py -------------------------------------------------------------------------------- /chirpy/annotators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/annotators/__init__.py -------------------------------------------------------------------------------- /chirpy/annotators/g2p.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.callables import Annotator 4 | from chirpy.core.state_manager import StateManager 5 | from typing import Optional, List 6 | import itertools 7 | logger = logging.getLogger('chirpylogger') 8 | 9 | 10 | class NeuralGraphemeToPhoneme(Annotator): 11 | name='g2p' 12 | def __init__(self, state_manager: StateManager, timeout=1, url=None, input_annotations = []): 13 | super().__init__(state_manager=state_manager, timeout=timeout, url=url, input_annotations=input_annotations) 14 | 15 | def get_default_response(self, input_data:str) -> List[str]: 16 | """The default response to be returned in case this module's execute fails, times out or is cancelled""" 17 | return None 18 | 19 | def execute(self, input_data: Optional[str]=None) -> List[str]: 20 | """ 21 | Run emotion classifier on input_data and return an emotion label. 22 | The emotion classifier is trained on Empathetic Dialogues Dataset (https://arxiv.org/abs/1811.00207) 23 | to predict the emotion given an utterance 24 | 25 | Args: 26 | input_data (str): text to be segmented into sentences 27 | "utterance": Input to emotion classifier 28 | 29 | Returns: 30 | List[str]: List of strings, each a sentence from the text 31 | """ 32 | if input_data is None: 33 | return [] 34 | 35 | 36 | logger.debug(f'Calling g2p Remote module with text="{input_data}"') 37 | output = self.remote_call({'text': input_data}) 38 | if not output or output.get('error', False): 39 | logger.error(f'Error when running SentSeg Remote Module. \n' 40 | f'Response: {output}.') 41 | return self.get_default_response(input_data) 42 | else: 43 | if 'response' in output: 44 | return output['response'] 45 | else: 46 | return self.get_default_response(input_data) 47 | 48 | -------------------------------------------------------------------------------- /chirpy/annotators/navigational_intent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/annotators/navigational_intent/__init__.py -------------------------------------------------------------------------------- /chirpy/annotators/neural_entity_linker.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from typing import Dict, Optional 5 | 6 | from chirpy.core import flags 7 | from chirpy.core.callables import Annotator 8 | from chirpy.core.state_manager import StateManager 9 | from chirpy.core.util import contains_phrase 10 | 11 | logger = logging.getLogger('chirpylogger') 12 | 13 | #For reference 14 | QUESTION_THRESHOLD = 0.60 15 | 16 | class NeuralEntityLinker(Annotator): 17 | name='entitylinker' 18 | def __init__(self, state_manager: StateManager, timeout=1.5, url=None, input_annotations = []): 19 | super().__init__(state_manager=state_manager, timeout=timeout, url=url, input_annotations=input_annotations) 20 | 21 | 22 | def get_default_response(self, input_data: Optional[Dict] = None): 23 | """The default response to be returned in case this module's execute fails, times out or is cancelled""" 24 | return {"is_question": False, "question_prob": 0} 25 | 26 | def execute(self, context, spans): 27 | input_data = {'context': list(context), 'spans': [list(x) for x in spans]} 28 | # logger.primary_info(f"Calling neural entity linker with {input_data}.") 29 | output = super().remote_call(input_data) 30 | logger.primary_info(f'Neural entity linker returned: {output}') 31 | return output 32 | 33 | 34 | if __name__ == "__main__": 35 | # You can test the entity linker below 36 | import requests 37 | import json 38 | class TestModule: 39 | def __init__(self, url): 40 | self.url = url 41 | def execute(self, data): 42 | response = requests.post(self.url, data=json.dumps(data), headers={'content-type': 'application/json'}, timeout=10) 43 | return response 44 | module = TestModule("http://cobot-LoadB-4W5PPC5CWWEX-1125293663.us-east-1.elb.amazonaws.com") 45 | output = module.execute({"utterance": "my day was good how about you"}).json() 46 | print(output) 47 | -------------------------------------------------------------------------------- /chirpy/annotators/responseranker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.callables import Annotator 4 | from chirpy.core.state_manager import StateManager 5 | from chirpy.core.latency import measure 6 | 7 | logger = logging.getLogger('chirpylogger') 8 | 9 | class ResponseRanker(Annotator): 10 | name='responseranker' 11 | def __init__(self, state_manager: StateManager, timeout=3, url=None, input_annotations = []): 12 | super().__init__(state_manager=state_manager, timeout=timeout, url=url, input_annotations=input_annotations) 13 | 14 | def get_default_response(self, input_data=None): 15 | """The default response to be returned in case this module's execute fails, times out or is cancelled""" 16 | N = len(input_data['responses']) 17 | return {"error": True, "score": [0] * N, "updown": [0] * N} 18 | 19 | @measure 20 | def execute(self, input_data=None): 21 | """ 22 | Input data should be a dict with keys 'context', 'responses', where 23 | - 'context': User's last utterance, type str. 24 | - 'responses': Possible next-turn responses, type List[str]. 25 | - 'config': Remote module configuration; currently, just a dict mapping to bools to determine which ranking models to use. 26 | 27 | It returns a mapping of possible responses to 'updown', 'width', and 'depth' scores as 28 | given by the DialogRPT (https://arxiv.org/abs/2009.06978) model family in the SAME ORDER as 29 | those passed in by responses: 30 | - 'updown': List[float] in [0, 1] 31 | - 'width': List[float] in [0, 1] 32 | - 'depth': List[float] in [0, 1] 33 | 34 | Higher is better. Each model call can be expected to take 20-30ms per sequence. 35 | """ 36 | results = self.remote_call(input_data) 37 | return results 38 | -------------------------------------------------------------------------------- /chirpy/annotators/sentseg.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.callables import Annotator 4 | from chirpy.core.state_manager import StateManager 5 | from typing import Optional, List 6 | import re 7 | import itertools 8 | logger = logging.getLogger('chirpylogger') 9 | 10 | 11 | class NLTKSentenceSegmenter(Annotator): 12 | name='sentseg' 13 | def __init__(self, state_manager: StateManager, timeout=0.5, url=None, input_annotations = []): 14 | super().__init__(state_manager=state_manager, timeout=timeout, url=url, input_annotations=input_annotations) 15 | 16 | def get_default_response(self, input_data:str) -> List[str]: 17 | """The default response to be returned in case this module's execute fails, times out or is cancelled""" 18 | try: 19 | return {'error': False, 'response': re.split('[.\n]', input_data)} 20 | except: 21 | return [] 22 | 23 | def execute(self, input_data: Optional[str]=None) -> List[str]: 24 | """ 25 | Run NLTK Sentence Segmenter on input_data and return a list of sentences. 26 | 27 | Args: 28 | input_data (str): text to be segmented into sentences 29 | 30 | Returns: 31 | List[str]: List of strings, each a sentence from the text 32 | """ 33 | if input_data is None: 34 | return [] 35 | 36 | 37 | logger.debug(f'Calling SentSeg Remote module with text="{input_data}"') 38 | #output = self.remote_call({'text': input_data}) 39 | output = None 40 | if not output or output.get('error', False): 41 | logger.error(f'Error when running SentSeg Remote Module. \n' 42 | f'Response: {output}.') 43 | return re.split('[.\n]', input_data) 44 | #raise RemoteServiceModuleError 45 | else: 46 | if 'response' in output: 47 | return [s.strip() for s in itertools.chain(*(s.split('\n') for s in output['response'])) if s.strip()] 48 | else: 49 | return self.get_default_response(input_data) 50 | -------------------------------------------------------------------------------- /chirpy/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/core/__init__.py -------------------------------------------------------------------------------- /chirpy/core/asr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/core/asr/__init__.py -------------------------------------------------------------------------------- /chirpy/core/asr/g2p.py: -------------------------------------------------------------------------------- 1 | # This file contains the g2p function that converts a span to one phonetic representation (grapheme to phoneme). 2 | # This function is used for indexing anchortexts and when input spans are looked up in the index. 3 | 4 | from functools import lru_cache 5 | import os 6 | import pickle 7 | from typing import List 8 | 9 | # Load the pickled cmudict 10 | # This is derived from the CMUDict pronunciation dictionary, which maps spelling of a word to potential phoneme pronuncations 11 | with open(os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'cmudict.pkl'), 'rb') as f: 12 | CMUDICT = pickle.load(f) 13 | 14 | def simple_g2p(span: str) -> List[str]: 15 | """ A simple dictionary-based grapheme to phoneme algorithm """ 16 | lattice = [CMUDICT.get(x, ['?']) for x in span.lower().split()] 17 | if len(lattice) == 0: 18 | return [] 19 | 20 | res = lattice[0][0].split() 21 | for x in lattice[1:]: 22 | res.append(' ') 23 | res.extend(x[0].split()) 24 | return res 25 | 26 | 27 | @lru_cache(maxsize=32768) 28 | def g2p(span: str, g2p_module = None) -> List[str]: 29 | """ Use the remote g2p module for grapheme to phoneme conversion when simple dict-based method fails """ 30 | simple_phonemes = simple_g2p(span) 31 | if '?' not in simple_phonemes: 32 | return simple_phonemes 33 | 34 | try: 35 | phonemes = g2p_module.execute(span) 36 | if phonemes is None: 37 | return simple_phonemes 38 | return phonemes 39 | except Exception as ex: 40 | return simple_phonemes 41 | 42 | if __name__ == "__main__": 43 | from chirpy.core.asr.index_phone_to_ent import MockG2p 44 | mock_g2p_module = MockG2p() 45 | print(g2p('there eyes of skywalker', mock_g2p_module)) 46 | print(g2p('the rise of skywalker', mock_g2p_module)) 47 | print(g2p('love you 3000', mock_g2p_module)) 48 | 49 | -------------------------------------------------------------------------------- /chirpy/core/blacklists/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/core/blacklists/__init__.py -------------------------------------------------------------------------------- /chirpy/core/blacklists/blacklists.py: -------------------------------------------------------------------------------- 1 | """This file is deprecated - we want to delete the contents once these functions are no longer being used""" 2 | 3 | import logging 4 | from chirpy.core.util import contains_phrase, is_exactmatch 5 | 6 | logger = logging.getLogger('chirpylogger') 7 | 8 | # List of nounphrases that might appear in user utterances, but we don't want to look them up in wikipedia, news, showerthoughts etc 9 | # Should all be lowercase with no punctuation 10 | DONT_LOOKUP_NOUNPHRASES = set(['news', 'movies', 'movie', 'alexa', 'your name', 'the name', 'my name', 'name', 11 | 'something else', 'thanks', 'thank you', 'nothing', 'question', 'a question', 12 | 'the subject', 'subject', 'conversation', 'the conversation', 'a conversation', 'corona']) 13 | 14 | def is_dontlookup_nounphrase(nounphrase: str): 15 | """ 16 | Checks whether the nounphrase is an exact match with something in DONT_LOOKUP_NOUNPHRASES. 17 | The check is case-blind (nounphrase will be lowercased and DONT_LOOKUP_NOUNPHRASES is already lowercase). 18 | 19 | THIS FN IS DEPRECATED AND SHOULDN'T BE USED 20 | """ 21 | return is_exactmatch(nounphrase, DONT_LOOKUP_NOUNPHRASES, 'nounphrase "{}" is in DONT_LOOKUP_NOUNPHRASES', 22 | lowercase_text=True, lowercase_phrases=False) 23 | 24 | 25 | def contains_dontlookup_nounphrase(text: str): 26 | """ 27 | Checks whether the text contains anything in DONT_LOOKUP_NOUNPHRASES. 28 | The check is case-blind (text will be lowercased and have punctuation removed, and DONT_LOOKUP_NOUNPHRASES is 29 | already lowercase with punctuation removed). 30 | Note that text might still be reasonable to lookup, even if it contains a "don't lookup" nounphrase. 31 | 32 | THIS FN IS DEPRECATED AND SHOULDN'T BE USED 33 | """ 34 | return contains_phrase(text, DONT_LOOKUP_NOUNPHRASES, 'text "{}" contains DONT_LOOKUP nounphrase "{}"', 35 | lowercase_text=True, lowercase_phrases=False, 36 | remove_punc_text=True, remove_punc_phrases=False) 37 | -------------------------------------------------------------------------------- /chirpy/core/canary.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | 5 | logger = logging.getLogger('chirpylogger') 6 | 7 | def is_already_canary(event): 8 | """Returns True iff this event is part of a session that is already a canary session""" 9 | if 'attributes' in event['session'] and 'canary_session' in event['session']['attributes'] and event['session']['attributes']['canary_session']: 10 | return True 11 | return False 12 | 13 | 14 | def mark_as_canary(event): 15 | """Marks this event as part of a canary session""" 16 | if 'attributes' not in event['session']: 17 | logger.info(f"'attributes' not in event['session']: {event}") 18 | event['session']['attributes'] = {} 19 | event['session']['attributes']['canary_session'] = True 20 | logger.info(f'Marked this event as a canary: {event}') 21 | 22 | 23 | def should_be_canary(event): 24 | """Determines whether this turn should be part of canary or not.""" 25 | # If it's already a canary, return True 26 | if is_already_canary(event): 27 | return True 28 | 29 | # Get the canary_ratio environment variable. 30 | canary_ratio = os.environ.get('CANARY_RATIO') 31 | logger.info(f'canary_ratio is {canary_ratio}') 32 | if canary_ratio is None: 33 | return False 34 | canary_on = os.environ.get('CANARY_ON') 35 | logger.info(f'canary_on is {canary_on}') 36 | if canary_on is None: 37 | return False 38 | 39 | # If this is a new session, randomly decide whether this should be a canary conversation 40 | if event['session']['new'] and float(canary_ratio) > 0 and random.random() < float(canary_ratio) and canary_on == 'TRUE': 41 | sessionId = event['session']['sessionId'] 42 | mark_as_canary(event) 43 | logger.info(f'This session {sessionId} has been assigned to canary test (canary_ratio={canary_ratio})') 44 | return True 45 | 46 | return False -------------------------------------------------------------------------------- /chirpy/core/default_rg_states.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.state import BaseState 2 | from chirpy.response_generators.acknowledgment.state import State as AcknowledgmentState 3 | from chirpy.response_generators.aliens.state import State as AliensState 4 | from chirpy.response_generators.categories.state import State as CategoriesState 5 | from chirpy.response_generators.closing_confirmation.state import State as ClosingConfirmationState 6 | from chirpy.response_generators.fallback.state import State as FallbackState 7 | from chirpy.response_generators.food.state import State as FoodState 8 | from chirpy.response_generators.launch.state import State as LaunchState 9 | from chirpy.response_generators.neural_chat.state import State as NeuralChatState 10 | from chirpy.response_generators.neural_fallback.state import State as NeuralFallbackState 11 | from chirpy.response_generators.offensive_user.state import State as OffensiveUserState 12 | from chirpy.response_generators.one_turn_hack.state import State as OneTurnHackState 13 | #from chirpy.response_generators.opinion2.state_actions import State as OpinionState 14 | from chirpy.response_generators.personal_issues.state import State as PersonalIssuesState 15 | from chirpy.response_generators.sports.state import State as SportsState 16 | from chirpy.response_generators.pets.state import State as PetsState 17 | from chirpy.response_generators.transition.state import State as TransitionState 18 | from chirpy.response_generators.music.state import State as MusicState 19 | from chirpy.response_generators.wiki2.state import State as WikiState 20 | from chirpy.response_generators.reopen.state import State as ReopenState 21 | 22 | DEFAULT_RG_STATES = { 23 | 'ACKNOWLEDGMENT': AcknowledgmentState(), 24 | 'ALEXA_COMMANDS': BaseState(), 25 | 'ALIENS': AliensState(), 26 | 'CATEGORIES': CategoriesState(), 27 | 'CLOSING_CONFIRMATION': ClosingConfirmationState(), 28 | 'COMPLAINT': BaseState(), 29 | 'FALLBACK': FallbackState(), 30 | 'FOOD': FoodState(), 31 | 'LAUNCH': LaunchState(), 32 | 'MUSIC': MusicState(), 33 | 'NEURAL_CHAT': NeuralChatState(), 34 | 'NEURAL_FALLBACK': NeuralFallbackState(), 35 | 'OFFENSIVE_USER': OffensiveUserState(), 36 | 'ONE_TURN_HACK': OneTurnHackState(), 37 | #'OPINION': OpinionState(), 38 | 'PERSONAL_ISSUES': PersonalIssuesState(), 39 | 'RED_QUESTION': BaseState(), 40 | 'TRANSITION': TransitionState(), 41 | 'WIKI': WikiState(), 42 | 'REOPEN': ReopenState(), 43 | } 44 | 45 | 46 | def is_default_state(rg_name, state): 47 | return DEFAULT_RG_STATES[rg_name] == state 48 | -------------------------------------------------------------------------------- /chirpy/core/entity_linker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/core/entity_linker/__init__.py -------------------------------------------------------------------------------- /chirpy/core/entity_linker/thresholds.py: -------------------------------------------------------------------------------- 1 | """This file contains some thresholds that are used in the entity linker""" 2 | 3 | SCORE_THRESHOLD_HIGHPREC = 0 # Generally, a LinkedSpan needs its top entity to have a score greater than this to be high precision 4 | SCORE_THRESHOLD_EXPECTEDTYPE = 0 # If we have an expected type, we look for entities of expected type with this score or higher to become cur_entity 5 | SCORE_THRESHOLD_NAV_ABOUT = 0 # when the user says "I want to talk about X", we look for any entities with this score or higher in X 6 | SCORE_THRESHOLD_NAV_NOT_ABOUT = 0 # when the user says "I want to talk X", we look for any entities with this score or higher in X 7 | SCORE_THRESHOLD_ELIMINATE_OUTER_SPAN = 0 # if an outer span has a score below this, and an inner span has score above SCORE_THRESHOLD_HIGHPREC, the inner span wins 8 | SCORE_THRESHOLD_CHOOSE_INNER_SPAN_OF_TYPE = 0 # if an inner span has a score above this and is of expected type, and the outer span has a lower score and is not of expected type, the inner span wins 9 | SCORE_THRESHOLD_ELIMINATE_DONT_LINK_WORDS = 0 # in a LinkedSpan, if a candidate entity has a score below this, and the span consists entirely of DONT_LINK_WORDS, discard the candidate entity 10 | SCORE_THRESHOLD_ELIMINATE_HIGHFREQUNIGRAM_SPAN = 0 # in a LinkedSpan, if a candidate entity has a score below this, and the span consists entirely of high-freq unigrams, discard the candidate entity (see usage for more detailed rules) 11 | SCORE_THRESHOLD_ELIMINATE = 0 # in a LinkedSpan, if a candidate entity has a score below this, remove it 12 | UNIGRAM_FREQ_THRESHOLD = 9 # unigram spans need a frequency lower than this to be high precision -------------------------------------------------------------------------------- /chirpy/core/es_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "wiki": {"url": "placeholder"} 3 | } 4 | -------------------------------------------------------------------------------- /chirpy/core/flags.py: -------------------------------------------------------------------------------- 1 | save_after_return = False 2 | progressive_response = False 3 | use_timeouts = True 4 | inf_timeout = 10**6 # this might be interpreted as 1 million seconds or 1 million milliseconds (1000 seconds) depending on the context; we make it large enough that it doesn't matter either way 5 | USE_ASR_ROBUSTNESS_OVERALL_FLAG = True # enable ASR robustness in the entity linker 6 | 7 | # This is the max size the entire item that we write to dynamodb 8 | SIZE_THRESHOLD = 400*1024 - 100 # 400 kb - 100 bytes for 9 | -------------------------------------------------------------------------------- /chirpy/core/offensive_classifier/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/core/offensive_classifier/__init__.py -------------------------------------------------------------------------------- /chirpy/core/offensive_classifier/data_original/README.txt: -------------------------------------------------------------------------------- 1 | full-list-of-bad-words_text-file_2018_07_30.txt 2 | is from here: https://www.freewebheaders.com/full-list-of-bad-words-banned-by-google/ 3 | it has about 1700 phrases 4 | apparently it's a third party reconstruction of what Google uses for SafeSearch 5 | don't edit this file directly; instead put your modifications in blacklists.py -------------------------------------------------------------------------------- /chirpy/core/offensive_classifier/data_preprocessed/README.txt: -------------------------------------------------------------------------------- 1 | This is the output of the write_preprocessed_blacklist() in the preprocess_blacklist.py file -------------------------------------------------------------------------------- /chirpy/core/offensive_classifier/preprocess_blacklist.py: -------------------------------------------------------------------------------- 1 | from textblob import TextBlob 2 | import nltk 3 | import os 4 | 5 | def load_blacklist(fname): 6 | blacklist = [] 7 | with open(fname, 'r') as f: 8 | for line in f: 9 | word = line.strip('\n').lower() 10 | blacklist.append(word) 11 | return blacklist 12 | 13 | def write_preprocessed_blacklist(): 14 | """ 15 | Load our blacklist, and preprocess it (for each offensive phrase add plural versions, 16 | punctuation variants etc). Write the full preprocessed set to file. 17 | """ 18 | 19 | outfile = os.path.join(os.path.dirname(__file__), '../../../chirpy/core/offensive_classifier/data_preprocessed/offensive_phrases_preprocessed.txt') 20 | 21 | # Load and preprocess our additional blacklist 22 | blacklist_file_path = os.path.join(os.path.dirname(__file__), '../../../chirpy/core/offensive_classifier/data_original/full-list-of-bad-words_text-file_2018_07_30.txt') 23 | blacklist = load_blacklist(blacklist_file_path) # list of strings 24 | 25 | # Merge into set 26 | blacklist = set(blacklist) 27 | 28 | # Make some alternate versions 29 | for phrase in list(blacklist): 30 | phrase_words = phrase.split() 31 | if len(phrase_words) > 1: # if it's a multi word phrase, trying sticking the words together or hyphenating 32 | print(phrase, ''.join(phrase_words)) 33 | blacklist.add(''.join(phrase_words)) 34 | print(phrase, '-'.join(phrase_words)) 35 | blacklist.add('-'.join(phrase_words)) 36 | if '-' in phrase and '--' not in phrase: # if it contains hyphens (e.g. jerk-off but not f--k), try sticking the words together or spacing them 37 | print(phrase, phrase.replace('-', '')) 38 | blacklist.add(phrase.replace('-', '')) 39 | print(phrase, ' '.join(phrase.replace('-', ' ').split())) 40 | blacklist.add(' '.join(phrase.replace('-', ' ').split())) 41 | 42 | # Write to file 43 | print(f'writing {len(blacklist)} phrases to {outfile}...') 44 | with open(outfile, 'w') as f: 45 | for phrase in sorted(list(blacklist)): 46 | f.write(phrase + '\n') 47 | print('done') 48 | 49 | if __name__ == "__main__": 50 | write_preprocessed_blacklist() -------------------------------------------------------------------------------- /chirpy/core/offensive_classifier/util.py: -------------------------------------------------------------------------------- 1 | from textblob import TextBlob 2 | import nltk 3 | import os 4 | 5 | def load_blacklist(fname): 6 | blacklist = [] 7 | with open(fname, 'r') as f: 8 | for line in f: 9 | word = line.strip('\n').lower() 10 | blacklist.append(word) 11 | return blacklist 12 | 13 | def write_preprocessed_blacklist(): 14 | """ 15 | Load our blacklist, and preprocess it (for each offensive phrase add plural versions, 16 | punctuation variants etc). Write the full preprocessed set to file. 17 | """ 18 | 19 | outfile = os.path.join(os.path.dirname(__file__), '../../../chirpy/core/offensive_classifier/data_preprocessed/offensive_phrases_preprocessed.txt') 20 | 21 | # Load and preprocess our additional blacklist 22 | blacklist_file_path = os.path.join(os.path.dirname(__file__), '../../../chirpy/core/offensive_classifier/data_original/full-list-of-bad-words_text-file_2018_07_30.txt') 23 | blacklist = load_blacklist(blacklist_file_path) # list of strings 24 | 25 | # Merge into set 26 | blacklist = set(blacklist) 27 | 28 | # Make some alternate versions 29 | for phrase in list(blacklist): 30 | phrase_words = phrase.split() 31 | if len(phrase_words) > 1: # if it's a multi word phrase, trying sticking the words together or hyphenating 32 | print(phrase, ''.join(phrase_words)) 33 | blacklist.add(''.join(phrase_words)) 34 | print(phrase, '-'.join(phrase_words)) 35 | blacklist.add('-'.join(phrase_words)) 36 | if '-' in phrase and '--' not in phrase: # if it contains hyphens (e.g. jerk-off but not f--k), try sticking the words together or spacing them 37 | print(phrase, phrase.replace('-', '')) 38 | blacklist.add(phrase.replace('-', '')) 39 | print(phrase, ' '.join(phrase.replace('-', ' ').split())) 40 | blacklist.add(' '.join(phrase.replace('-', ' ').split())) 41 | 42 | # Write to file 43 | print(f'writing {len(blacklist)} phrases to {outfile}...') 44 | with open(outfile, 'w') as f: 45 | for phrase in sorted(list(blacklist)): 46 | f.write(phrase + '\n') 47 | print('done') 48 | 49 | if __name__ == "__main__": 50 | write_preprocessed_blacklist() -------------------------------------------------------------------------------- /chirpy/core/regex/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/core/regex/__init__.py -------------------------------------------------------------------------------- /chirpy/core/regex/util.py: -------------------------------------------------------------------------------- 1 | """This file is for generally useful regex patterns and functions""" 2 | 3 | # This regex matches any string, including the empty string. The ? makes it non-greedy. 4 | # Non-greedy means that it'll stop matching as soon as the next part of regex starts matching. 5 | from typing import List 6 | 7 | # This regex matches any string, including the empty string. The ? makes it non-greedy. 8 | # Non-greedy means that it'll stop matching as soon as the next part of regex starts matching. 9 | OPTIONAL_TEXT = '.*?' 10 | 11 | # This regex matches any string, except the empty string. The ? makes it non-greedy. 12 | NONEMPTY_TEXT = '.+?' 13 | 14 | # This regex matches: (a) any string ending in space, and (b) empty string. The ? makes it non-greedy. 15 | OPTIONAL_TEXT_PRE = '(.*? |)' 16 | 17 | # This regex matches: (a) any string starting with space, and (b) empty string. The ? makes it non-greedy. 18 | # Note: I (Abi) changed this from '( .*?|)' to '(| .*?)' because that gave the non-greedy behavior I wanted in one case, 19 | # and all the regex tests are still passing, so it seems OK. Let me know if it's a problem. 20 | OPTIONAL_TEXT_POST = '(| .*?)' 21 | 22 | # This regex matches any non-empty string where the first character is a space and the last character is a space. 23 | # The ? makes it non-greedy. 24 | OPTIONAL_TEXT_MID = ' (.*? |)' 25 | 26 | # Greedy versions of the above constants. 27 | OPTIONAL_TEXT_GREEDY = '.*' 28 | NONEMPTY_TEXT_GREEDY = '.+' 29 | OPTIONAL_TEXT_PRE_GREEDY = '(.* |)' 30 | OPTIONAL_TEXT_POST_GREEDY = '( .*|)' 31 | OPTIONAL_TEXT_MID_GREEDY = ' (.*? |)' 32 | 33 | 34 | 35 | def oneof(lst: List[str]) -> str: 36 | """Given a list of regex patterns, returns a regex pattern that matches any one in the list""" 37 | assert isinstance(lst, list) 38 | return '({})'.format('|'.join(lst)) 39 | 40 | 41 | def one_or_more_spacesep(lst: List[str]): 42 | """ 43 | Given a list of regex patterns, returns a regex pattern that matches any string which is one or more items from 44 | the list, space-separated (no space at the start or end). 45 | """ 46 | assert isinstance(lst, list) 47 | return f'({oneof(lst)})( {oneof(lst)})*' 48 | 49 | 50 | def zero_or_more_spacesep(lst: List[str]): 51 | """ 52 | Given a list of regex patterns, returns a regex pattern that matches any string which is zero or more items from 53 | the list, space-separated (no space at the start or end). 54 | """ 55 | assert isinstance(lst, list) 56 | return '({oneof(lst)})?( {oneof(lst)})*' 57 | -------------------------------------------------------------------------------- /chirpy/core/response_generator/__init__.py: -------------------------------------------------------------------------------- 1 | from .response_generator import ResponseGenerator 2 | from .response_type import ResponseType 3 | from .state import BaseState, BaseConditionalState 4 | from .treelet import Treelet 5 | from .helpers import * 6 | -------------------------------------------------------------------------------- /chirpy/core/response_generator/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .yes_template import YesTemplate, NotYesTemplate 2 | from .disinterested_template import DisinterestedTemplate 3 | from .change_topic_template import ChangeTopicTemplate 4 | from .request_repeat import RequestRepeatTemplate 5 | from .say_that_again_template import SayThatAgainTemplate 6 | from .no_template import NoTemplate 7 | -------------------------------------------------------------------------------- /chirpy/core/response_generator/regex_templates/change_topic_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 3 | 4 | CHANGE_TOPIC_PHRASE = [ 5 | "talk about", 6 | "tell me about" 7 | ] 8 | 9 | SWITCH_PHRASE = [ 10 | "switch", 11 | "change" 12 | ] 13 | 14 | class ChangeTopicTemplate(RegexTemplate): 15 | slots = { 16 | 'change_topic': CHANGE_TOPIC_PHRASE, 17 | 'switch': SWITCH_PHRASE 18 | } 19 | templates = [ 20 | OPTIONAL_TEXT_PRE + "{change_topic}" + OPTIONAL_TEXT_POST, 21 | OPTIONAL_TEXT_PRE + "{switch}" + OPTIONAL_TEXT_MID + "topic(s)?" + OPTIONAL_TEXT_POST, 22 | OPTIONAL_TEXT_PRE + "{switch}" + OPTIONAL_TEXT_MID + "talk" + OPTIONAL_TEXT_POST, 23 | OPTIONAL_TEXT_PRE + "{switch}" + OPTIONAL_TEXT_MID + "talking" + OPTIONAL_TEXT_POST 24 | ] 25 | positive_examples = [ 26 | ("let's talk about grand theft auto", {'change_topic': 'talk about'}), 27 | ("i don't want to talk about it", {'change_topic': 'talk about'}), 28 | ('can we talk about food', {'change_topic': 'talk about'}), 29 | ('can you tell me about wolves', {'change_topic': 'tell me about'}), 30 | ("can we switch the topic", {'switch': "switch"}), 31 | ("can we switch to talking about wolves", {"switch": "switch"}), 32 | ("let's change topics", {"switch": "change"}) 33 | ] 34 | negative_examples = [ 35 | "No, there isn't a problem", 36 | "i love playing on my nintendo switch" 37 | ] -------------------------------------------------------------------------------- /chirpy/core/response_generator/regex_templates/disinterested_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 3 | 4 | 5 | class DisinterestedTemplate(RegexTemplate): 6 | slots = {} 7 | 8 | templates = [ 9 | OPTIONAL_TEXT_PRE + "don't" + OPTIONAL_TEXT_MID + "care" + OPTIONAL_TEXT_POST, 10 | OPTIONAL_TEXT_PRE + 'not' + OPTIONAL_TEXT_MID + 'interested' + OPTIONAL_TEXT_POST, 11 | OPTIONAL_TEXT_PRE + 'do not' + OPTIONAL_TEXT_MID + 'care' + OPTIONAL_TEXT_POST, 12 | # OPTIONAL_TEXT_PRE + "don't like" + OPTIONAL_TEXT_POST, 13 | OPTIONAL_TEXT_PRE + "don't wanna" + OPTIONAL_TEXT_POST, 14 | OPTIONAL_TEXT_PRE + "i hate (this|it)" 15 | ] 16 | 17 | positive_examples = [ 18 | ("i don't really care", {}), 19 | ("yeah i hate it", {}) 20 | ] 21 | 22 | negative_examples = [ 23 | ] 24 | -------------------------------------------------------------------------------- /chirpy/core/response_generator/regex_templates/no_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 3 | from chirpy.core.regex.word_lists import NEGATIVE_WORDS, CONTINUER 4 | 5 | class NoTemplate(RegexTemplate): 6 | slots = { 7 | 'neg_word': NEGATIVE_WORDS, 8 | 'continuer': list(set(CONTINUER) - {'yes', 'yea', 'yeah'}), 9 | 'safe': ["bad", "worries"] 10 | } 11 | templates = [ 12 | "{continuer} {neg_word}(?! {safe})" + OPTIONAL_TEXT_POST, 13 | "{neg_word}(?! {safe})" + OPTIONAL_TEXT_POST 14 | ] 15 | positive_examples = [ 16 | ("no", {'neg_word': 'no'}), 17 | ("no i don't want to talk about that", {'neg_word': 'no'}), 18 | ("hmm nah i don't think so", {'continuer': 'hmm', 'neg_word': 'nah'}), 19 | ("not especially", {'neg_word': 'not especially'}) 20 | ] 21 | negative_examples = [ 22 | "ok", 23 | "sure", 24 | "ok please tell me more", 25 | "i would really like to hear more", 26 | "i have no food", 27 | "i have no idea", 28 | "not bad", 29 | "no worries", 30 | "hmm no worries", 31 | "okay, i've no idea what you said", 32 | "yes but i will talk to you later maybe tomorrow" 33 | ] -------------------------------------------------------------------------------- /chirpy/core/response_generator/regex_templates/request_repeat.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex import word_lists 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 4 | from chirpy.core.regex.word_lists import CONTINUER 5 | 6 | class RequestRepeatTemplate(RegexTemplate): 7 | slots = { 8 | "say_that_again": word_lists.SAY_THAT_AGAIN 9 | } 10 | templates = [ 11 | OPTIONAL_TEXT_PRE + "{say_that_again}", 12 | OPTIONAL_TEXT_PRE + "alexa {say_that_again}", 13 | ] 14 | positive_examples = [ 15 | ("what did you just say", {"say_that_again": "what did you just say"}), 16 | ("could you please repeat yourself", {"say_that_again": "please repeat yourself"}), 17 | ("alexa can you ask me that again", {"say_that_again": "can you ask me that again"}), 18 | ("repeat what you just said", {"say_that_again": "repeat what you just said"}), 19 | ("say that again", {"say_that_again": "say that again"}), 20 | ("alexa say that again please", {"say_that_again": "say that again please"}), 21 | ("what was the question", {"say_that_again": "what was the question"}), 22 | ("sorry what was that", {"say_that_again": "what was that"}), 23 | ("whoops i didn't quite catch that", {"say_that_again": "i didn't quite catch that"}), 24 | ("wait sorry alexa could you please just say that one more time", {"say_that_again": "say that one more time"}), 25 | ("i'm sorry i couldn't hear that", {"say_that_again": "i couldn't hear that"}), 26 | ("what", {"say_that_again": "what"}), 27 | ("what can you repeat again sorry alexa can i hear you", {"say_that_again": "can you repeat again"}) 28 | ] 29 | negative_examples = [ 30 | "i'm sorry to hear that" 31 | ] 32 | -------------------------------------------------------------------------------- /chirpy/core/response_generator/regex_templates/say_that_again_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.word_lists import SAY_THAT_AGAIN 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | 4 | class SayThatAgainTemplate(RegexTemplate): 5 | slots = { 6 | "say_that_again": SAY_THAT_AGAIN 7 | } 8 | templates = [ 9 | "{say_that_again}", 10 | "alexa {say_that_again}", 11 | ] 12 | positive_examples = [ 13 | ("what did you just say", {"say_that_again": "what did you just say"}), 14 | ("could you please repeat yourself", {"say_that_again": "could you please repeat"}), 15 | ("alexa can you ask me that again", {"say_that_again": "can you ask me that again"}), 16 | ("repeat what you just said", {"say_that_again": "repeat what you just said"}), 17 | ("say that again", {"say_that_again": "say that again"}), 18 | ("alexa say that again please", {"say_that_again": "say that again"}), 19 | ] 20 | negative_examples = [] 21 | -------------------------------------------------------------------------------- /chirpy/core/response_generator/regex_templates/yes_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from movies RG's YesTemplate 3 | """ 4 | 5 | from chirpy.core.regex.regex_template import RegexTemplate 6 | from chirpy.core.regex.util import OPTIONAL_TEXT, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 7 | 8 | YES_WORDS = [ 9 | "yes", 10 | "all right", 11 | "alright", 12 | "very well", 13 | "of course", 14 | "by all means", 15 | "sure", 16 | "certainly", 17 | "absolutely", 18 | "indeed", 19 | "right", 20 | "affirmative", 21 | "in the affirmative", 22 | "agreed", 23 | "roger", 24 | "aye aye", 25 | "yeah", 26 | "yep", 27 | "yeap", 28 | "yup", 29 | "ya", 30 | "uh-huh", 31 | "okay", 32 | "ok", 33 | "okey-dokey", 34 | "okey-doke", 35 | "yea", 36 | "aye", 37 | "duh", 38 | "guess so", 39 | "kind of", 40 | ] 41 | 42 | SINGLE_YES_WORDS = [ 43 | 'course' # prevent false positives like "course of medication" 44 | ] 45 | 46 | class YesTemplate(RegexTemplate): 47 | slots = { 48 | 'yes_word': YES_WORDS, 49 | 'single_word': SINGLE_YES_WORDS, 50 | 'neutral_positive': ['guess'], 51 | } 52 | templates = [ 53 | OPTIONAL_TEXT_PRE + "{yes_word}" + OPTIONAL_TEXT_POST, 54 | "{single_word}", 55 | "i {neutral_positive}", 56 | 57 | ] 58 | positive_examples = [ 59 | ("yes let's keep talking", {'yes_word': 'yes'}), 60 | ("alright i will keep talking", {'yes_word': 'alright'}), 61 | ("course", {'single_word': 'course'}) 62 | ] 63 | 64 | negative_examples = [ 65 | "i don't want to talk about this any more", 66 | "can we talk about something else", 67 | # "not right now" 68 | ] 69 | 70 | class NotYesTemplate(RegexTemplate): 71 | """ 72 | Catching false positives caused by YesTemplate 73 | """ 74 | slots = { 75 | 'phrase': ['right now'] 76 | } 77 | templates = [ 78 | OPTIONAL_TEXT_PRE + "{phrase}" + OPTIONAL_TEXT_POST 79 | ] 80 | positive_examples = [ 81 | ("i'm not watching any tv show right now", {'phrase': 'right now'}) 82 | ] 83 | 84 | negative_examples = [ 85 | ] 86 | -------------------------------------------------------------------------------- /chirpy/core/response_generator/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any, List, Tuple, Set, Optional, Dict # NOQA 3 | 4 | from chirpy.core.response_generator.response_type import ResponseType 5 | 6 | import logging 7 | logger = logging.getLogger('chirpylogger') 8 | 9 | """ 10 | Define the base states that will be returned by all treelets. 11 | Individual RGs should implement a state.py that defines their own State and ConditionalState 12 | that inherit from these classes. 13 | 14 | For no update to be made, set the conditional state's attribute values to NO_UPDATE. 15 | """ 16 | 17 | NO_UPDATE = "no-update" 18 | 19 | @dataclass 20 | class BaseState: 21 | prev_treelet_str: str = '' 22 | next_treelet_str: Optional[str] = '' 23 | response_types: Tuple[str] = () 24 | num_turns_in_rg: int = 0 25 | 26 | @dataclass 27 | class BaseConditionalState: 28 | prev_treelet_str: str = '' 29 | next_treelet_str: Optional[str] = '' 30 | response_types: Tuple[str] = NO_UPDATE 31 | 32 | def construct_response_types_tuple(response_types): 33 | return tuple([str(x) for x in response_types]) 34 | -------------------------------------------------------------------------------- /chirpy/core/smooth_handoffs.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, auto 2 | 3 | class SmoothHandoff(Enum): 4 | """ 5 | A smooth handoff is when one RG gives a response with needs_prompt=True, and another RG gives a prompt with 6 | PromptType=FORCE_START. This is used when we want a particular scripted transition. 7 | 8 | To make it easier to construct smooth handoffs without needing to write lots of extra code, you can 9 | (1) Create a unique identifier in this enumeration for your smooth handoff. 10 | (2) In its ResponseGeneratorResult, have the responding RG set smooth_handoff to the appropriate identifier in 11 | this enumeration. If the response is chosen, current_state.smooth_handoff will be set to the identifier. 12 | (3) In the prompting RG's get_prompt function, check whether current_state.smooth_handoff equals your 13 | identifier. If so, give the scripted prompt with PromptType=FORCE_START. 14 | """ 15 | LAUNCH_TO_NEURALCHAT = auto() # This signal is given at the end of the LAUNCH sequence (greeting and getting name) and is picked up by NEURAL_CHAT 16 | MOVIES_TO_CATEGORIES = auto() 17 | ONE_TURN_TO_WIKI_GF = auto() 18 | NEURALCHAT_TO_WIKI = auto() 19 | NEWS_TO_SPORTS = auto() 20 | PETS_TO_WIKI = auto() 21 | -------------------------------------------------------------------------------- /chirpy/core/state_manager.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import jsonpickle 4 | import logging 5 | from boto3.dynamodb.conditions import Key 6 | from typing import List, Tuple, Optional # NOQA 7 | 8 | from chirpy.core.user_attributes import UserAttributes 9 | from chirpy.core.state import State 10 | import chirpy.core.flags as flags 11 | from chirpy.core.entity_tracker.entity_tracker import EntityTrackerState 12 | from chirpy.core.util import print_dict_linebyline, get_ngrams 13 | from chirpy.core.experiment import EXPERIMENT_PROBABILITIES, EXPERIMENT_NOT_FOUND 14 | 15 | 16 | logger = logging.getLogger('chirpylogger') 17 | 18 | @dataclass 19 | class StateManager: 20 | current_state: State 21 | user_attributes: UserAttributes 22 | last_state: Optional[State] = None 23 | 24 | @property 25 | def last_state_active_rg(self): 26 | return self.last_state and self.last_state.active_rg 27 | 28 | @property 29 | def last_state_response(self): 30 | if not self.last_state: return None 31 | if hasattr(self.last_state, 'prompt_results'): return self.last_state.prompt_results[self.last_state.active_rg] 32 | else: return self.last_state.response_results[self.last_state.active_rg] 33 | -------------------------------------------------------------------------------- /chirpy/core/test_args.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | class TestArgs: 4 | """ 5 | These arguments are provided to the lambda handler and help in overriding certain probabilitistic parts 6 | """ 7 | def __init__(self, selected_prompt_rg: Optional[str] = None, experiment_values: dict = {}, 8 | neural_chat_args: dict = {}): 9 | """ 10 | 11 | @param selected_prompt_rg: if supplied, the name of the RG whose prompt should be deterministically chosen 12 | (if a prompt from this RG is available) 13 | @param experiment_values: 14 | """ 15 | assert selected_prompt_rg is None or isinstance(selected_prompt_rg, str) 16 | assert isinstance(experiment_values, dict) 17 | self.selected_prompt_rg = selected_prompt_rg 18 | self.experiment_values = experiment_values 19 | self.neural_chat_args = neural_chat_args -------------------------------------------------------------------------------- /chirpy/data/cmudict.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/data/cmudict.pkl -------------------------------------------------------------------------------- /chirpy/response_generators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/acknowledgment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/acknowledgment/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/acknowledgment/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import List 3 | from chirpy.core.response_generator.state import BaseState, BaseConditionalState, NO_UPDATE 4 | 5 | ### 6 | # Define the state that will be returned by all treelets 7 | ### 8 | @dataclass 9 | class State(BaseState): 10 | acknowledged_entities: List[str] = field(default_factory=list) 11 | 12 | @dataclass 13 | class ConditionalState(BaseConditionalState): 14 | acknowledged_entities: List[str] = NO_UPDATE 15 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/aliens/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/aliens_helpers.py: -------------------------------------------------------------------------------- 1 | from chirpy.response_generators.aliens.regex_templates import * 2 | from chirpy.core.response_generator.response_type import add_response_types, ResponseType 3 | import logging 4 | logger = logging.getLogger('chirpylogger') 5 | 6 | ADDITIONAL_RESPONSE_TYPES = ['OPINION'] 7 | 8 | ResponseType = add_response_types(ResponseType, ADDITIONAL_RESPONSE_TYPES) 9 | 10 | def is_opinion(rg, utterance): 11 | top_da = rg.state_manager.current_state.dialogact['top_1'] 12 | return len(utterance.split()) >= 10 or top_da == 'opinion' 13 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | # from .first_person_template import FirstPersonRegexTemplate 2 | from .yes_template import YesTemplate 3 | from .disinterested_template import DisinterestedTemplate, ChangeTopicTemplate 4 | # from .no_template import NoTemplate 5 | # from .personal_pronoun_template import PersonalPronounRegexTemplate 6 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/regex_templates/disinterested_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 3 | 4 | class DisinterestedTemplate(RegexTemplate): 5 | slots = {} 6 | 7 | templates = [ 8 | OPTIONAL_TEXT_PRE + "don't" + OPTIONAL_TEXT_MID + "care" + OPTIONAL_TEXT_POST, 9 | OPTIONAL_TEXT_PRE + 'not' + OPTIONAL_TEXT_MID + 'interested' + OPTIONAL_TEXT_POST, 10 | OPTIONAL_TEXT_PRE + 'do not' + OPTIONAL_TEXT_MID + 'care' + OPTIONAL_TEXT_POST, 11 | OPTIONAL_TEXT_PRE + "don't like" + OPTIONAL_TEXT_POST, 12 | OPTIONAL_TEXT_PRE + "don't wanna" + OPTIONAL_TEXT_POST 13 | 14 | ] 15 | 16 | positive_examples = [ 17 | ("i don't really care", {}), 18 | ] 19 | 20 | negative_examples = [ 21 | ] 22 | 23 | CHANGE_TOPIC_PHRASE = [ 24 | "talk about", 25 | "tell me about", 26 | "switch" 27 | ] 28 | 29 | 30 | class ChangeTopicTemplate(RegexTemplate): 31 | slots = { 32 | 'change_topic': CHANGE_TOPIC_PHRASE 33 | } 34 | templates = [ 35 | OPTIONAL_TEXT_PRE + "{change_topic}" + OPTIONAL_TEXT_POST, 36 | ] 37 | positive_examples = [ 38 | ("let's talk about grand theft auto", {'change_topic': 'talk about'}), 39 | ("i don't want to talk about it", {'change_topic': 'talk about'}), 40 | ('can we talk about food', {'change_topic': 'talk about'}), 41 | ('can you tell me about wolves', {'change_topic': 'tell me about'}), 42 | ("can we switch the topic", {'change_topic': "switch"}) 43 | ] 44 | negative_examples = [ 45 | "No, there isn't a problem" 46 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/regex_templates/no_template.py: -------------------------------------------------------------------------------- 1 | # CC: Not being used for now. 2 | # from chirpy.core.regex.regex_template import RegexTemplate 3 | # from chirpy.core.regex.util import OPTIONAL_TEXT, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 4 | # 5 | # NO_WORDS = [ 6 | # "no", 7 | # "don't", 8 | # 'neither', 9 | # "i don't know", 10 | # 'else', 11 | # 'nothing', 12 | # 'nope', 13 | # "haven't", 14 | # "absolutely not", 15 | # "most certainly not", 16 | # "of course not", 17 | # "under no circumstances", 18 | # "by no means", 19 | # "not at all", 20 | # "negative", 21 | # "never", 22 | # "not really", 23 | # "nope", 24 | # "uh-uh", 25 | # "nah", 26 | # "not on your life", 27 | # "no way", 28 | # "no way Jose", 29 | # "ixnay", 30 | # "nay", 31 | # "not"] 32 | # 33 | # class NoTemplate(RegexTemplate): 34 | # slots = { 35 | # 'no_word': NO_WORDS, 36 | # } 37 | # templates = [ 38 | # OPTIONAL_TEXT_PRE + "{no_word}" + OPTIONAL_TEXT_POST 39 | # ] 40 | # positive_examples = [ 41 | # "no", 42 | # "no i don't want to talk about that", 43 | # "please don't talk about that", 44 | # "don't talk about that anymore", 45 | # "i do not want to hear more" 46 | # ] 47 | # negative_examples = [ 48 | # "ok", 49 | # "sure", 50 | # "ok please tell me more", 51 | # "i would really like to hear more" 52 | # 53 | # ] -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/regex_templates/yes_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from movies RG's YesTemplate 3 | """ 4 | 5 | from chirpy.core.regex.regex_template import RegexTemplate 6 | from chirpy.core.regex.util import OPTIONAL_TEXT, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 7 | 8 | YES_WORDS = [ 9 | "yes", 10 | "all right", 11 | "alright", 12 | "very well", 13 | "of course", 14 | "by all means", 15 | "sure", 16 | "certainly", 17 | "absolutely", 18 | "indeed", 19 | "right", 20 | "affirmative", 21 | "in the affirmative", 22 | "agreed", 23 | "roger", 24 | "aye aye", 25 | "yeah", 26 | "yep", 27 | "yup", 28 | "ya", 29 | "uh-huh", 30 | "okay", 31 | "ok", 32 | "okey-dokey", 33 | "okey-doke", 34 | "yea", 35 | "aye", 36 | "course", 37 | "duh" 38 | ] 39 | 40 | class YesTemplate(RegexTemplate): 41 | slots = { 42 | 'yes_word': YES_WORDS, 43 | } 44 | templates = [ 45 | OPTIONAL_TEXT_PRE + "{yes_word}" + OPTIONAL_TEXT_POST 46 | ] 47 | positive_examples = [ 48 | ("yes let's keep talking", {'yes_word': 'yes'}), 49 | ("alright i will keep talking", {'yes_word': 'alright'}) 50 | ] 51 | 52 | negative_examples = [ 53 | "i don't want to talk about this any more", 54 | "can we talk about something else" 55 | ] 56 | 57 | 58 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Tuple, Set # NOQA 3 | from chirpy.core.response_generator.state import * 4 | ### 5 | # Define the state that will be returned by all treelets 6 | ### 7 | 8 | @dataclass 9 | class State(BaseState): 10 | have_prompted: bool = False 11 | 12 | @dataclass 13 | class ConditionalState(BaseConditionalState): 14 | have_prompted: bool = NO_UPDATE 15 | 16 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/__init__.py: -------------------------------------------------------------------------------- 1 | from .introductory_treelet import IntroductoryTreelet 2 | from .turn_first_treelet import FirstTurnTreelet 3 | from .turn_second_treelet import SecondTurnTreelet 4 | from .turn_third_treelet import ThirdTurnTreelet 5 | from .turn_fourth_treelet import FourthTurnTreelet 6 | from .turn_fifth_treelet import FifthTurnTreelet 7 | from .question_treelet import QuestionTreelet 8 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/introductory_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | # RG IMPORTS 4 | from chirpy.core.response_generator import Treelet 5 | from chirpy.response_generators.aliens.aliens_responses import PROMPT 6 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, PromptResult, PromptType 7 | 8 | logger = logging.getLogger('chirpylogger') 9 | 10 | 11 | class IntroductoryTreelet(Treelet): 12 | name = 'aliens_introductory' 13 | 14 | def get_prompt(self, **kwargs): 15 | discussed_aliens_in_prev_convo = self.rg.get_user_attribute('discussed_aliens', False) 16 | state = self.rg.state 17 | num_convo_turns = len(self.rg.get_conversation_history()) // 2 18 | if state.have_prompted or num_convo_turns <= 30 or discussed_aliens_in_prev_convo: 19 | return None # don't prompt for aliens more than once 20 | 21 | return PromptResult( 22 | PROMPT, 23 | PromptType.FORCE_START, 24 | state, 25 | cur_entity=None, 26 | conditional_state=self.rg.ConditionalState( 27 | have_prompted=True, 28 | prev_treelet_str=self.name, 29 | next_treelet_str='transition' 30 | ) 31 | ) 32 | 33 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/question_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.response_generator import * 4 | from chirpy.response_generators.aliens.aliens_responses import QUESTION_RESPONSE 5 | from chirpy.core.response_priority import ResponsePriority, PromptType 6 | 7 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, AnswerType 8 | logger = logging.getLogger('chirpylogger') 9 | 10 | 11 | class QuestionTreelet(Treelet): 12 | name = "aliens_question" 13 | 14 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 15 | response = QUESTION_RESPONSE 16 | state, utterance, response_types = self.get_state_utterance_response_types() 17 | conditional_state = self.rg.ConditionalState(prev_treelet_str=self.name, 18 | next_treelet_str='transition') 19 | return ResponseGeneratorResult(text=response, priority=priority, needs_prompt=False, state=state, 20 | cur_entity=None, conditional_state=conditional_state, 21 | answer_type=AnswerType.STATEMENT) 22 | 23 | def get_question_response(self): 24 | return self.get_response(ResponsePriority.STRONG_CONTINUE, ) 25 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/turn_fifth_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.response_generator import * 4 | from chirpy.response_generators.aliens.state import State, ConditionalState 5 | from chirpy.response_generators.aliens.aliens_responses import MONOLOGUES, ACKNOWLEDGMENTS 6 | from chirpy.response_generators.aliens.aliens_helpers import ResponseType 7 | 8 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, AnswerType 9 | 10 | from random import choice 11 | logger = logging.getLogger('chirpylogger') 12 | 13 | 14 | class FifthTurnTreelet(Treelet): 15 | name = "aliens_fifth_turn" 16 | 17 | def get_response(self, priority): 18 | state, utterance, response_types = self.get_state_utterance_response_types() 19 | response = MONOLOGUES[5] 20 | conditional_state = ConditionalState(prev_treelet_str=self.name, 21 | next_treelet_str=None) 22 | prefix = choice(ACKNOWLEDGMENTS) + ' ' if ResponseType.OPINION in response_types else 'Yeah, ' 23 | return ResponseGeneratorResult(text=prefix+response, priority=priority, needs_prompt=False, state=state, 24 | cur_entity=None, conditional_state=conditional_state, 25 | answer_type=AnswerType.STATEMENT) 26 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/turn_first_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.response_generator import * 4 | from chirpy.response_generators.aliens.state import State, ConditionalState 5 | from chirpy.response_generators.aliens.aliens_responses import MONOLOGUES 6 | from chirpy.core.response_generator_datatypes import ResponsePriority 7 | 8 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, AnswerType 9 | logger = logging.getLogger('chirpylogger') 10 | 11 | 12 | class FirstTurnTreelet(Treelet): 13 | name = "aliens_first_turn" 14 | 15 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 16 | state, utterance, response_types = self.get_state_utterance_response_types() 17 | response = MONOLOGUES[1] 18 | conditional_state = ConditionalState(prev_treelet_str=self.name, 19 | next_treelet_str=self.rg.second_turn_treelet.name) 20 | return ResponseGeneratorResult(text=response, priority=priority, needs_prompt=False, state=state, 21 | cur_entity=None, conditional_state=conditional_state, 22 | answer_type=AnswerType.STATEMENT) 23 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/turn_fourth_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.response_generator import * 4 | from chirpy.response_generators.aliens.state import State, ConditionalState 5 | from chirpy.response_generators.aliens.aliens_responses import MONOLOGUES, ACKNOWLEDGMENTS 6 | from chirpy.response_generators.aliens.aliens_helpers import ResponseType 7 | 8 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, AnswerType 9 | from random import choice 10 | logger = logging.getLogger('chirpylogger') 11 | 12 | 13 | class FourthTurnTreelet(Treelet): 14 | name = "aliens_fourth_turn" 15 | 16 | def get_response(self, priority): 17 | state, utterance, response_types = self.get_state_utterance_response_types() 18 | response = MONOLOGUES[4] 19 | conditional_state = ConditionalState(prev_treelet_str=self.name, 20 | next_treelet_str=self.rg.fifth_turn_treelet.name) 21 | prefix = choice(ACKNOWLEDGMENTS) + ' ' if ResponseType.OPINION in response_types else 'Well, ' 22 | return ResponseGeneratorResult(text=prefix + response, priority=priority, needs_prompt=False, 23 | state=state, 24 | cur_entity=None, conditional_state=conditional_state, 25 | answer_type=AnswerType.STATEMENT) 26 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/turn_second_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.response_generator import * 4 | from chirpy.response_generators.personal_issues.state import State, ConditionalState 5 | from chirpy.response_generators.aliens.aliens_responses import MONOLOGUES 6 | from chirpy.response_generators.aliens.aliens_helpers import ResponseType 7 | 8 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, AnswerType 9 | logger = logging.getLogger('chirpylogger') 10 | 11 | 12 | class SecondTurnTreelet(Treelet): 13 | name = "aliens_second_turn" 14 | 15 | def get_response(self, priority): 16 | state, utterance, response_types = self.get_state_utterance_response_types() 17 | prefix = "Exactly! " if ResponseType.YES in response_types else "Well, please let me know if this is boring to you, but " 18 | response = prefix + MONOLOGUES[2] 19 | conditional_state = ConditionalState(prev_treelet_str=self.name, 20 | next_treelet_str=self.rg.third_turn_treelet.name) 21 | return ResponseGeneratorResult(text=response, priority=priority, needs_prompt=False, state=state, 22 | cur_entity=None, conditional_state=conditional_state, 23 | answer_type=AnswerType.STATEMENT) 24 | -------------------------------------------------------------------------------- /chirpy/response_generators/aliens/treelets/turn_third_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chirpy.core.response_generator import * 4 | from chirpy.response_generators.aliens.state import State, ConditionalState 5 | from chirpy.response_generators.aliens.aliens_responses import MONOLOGUES, ACKNOWLEDGMENTS 6 | from chirpy.response_generators.aliens.aliens_helpers import ResponseType 7 | 8 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, AnswerType, ResponsePriority 9 | 10 | from random import choice 11 | logger = logging.getLogger('chirpylogger') 12 | 13 | 14 | class ThirdTurnTreelet(Treelet): 15 | name = "aliens_third_turn" 16 | 17 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 18 | state, utterance, response_types = self.get_state_utterance_response_types() 19 | response = MONOLOGUES[3] 20 | prefix = choice(ACKNOWLEDGMENTS) + ' ' if ResponseType.OPINION in response_types else 'Right, ' 21 | conditional_state = ConditionalState(prev_treelet_str=self.name, 22 | next_treelet_str=self.rg.fourth_turn_treelet.name) 23 | return ResponseGeneratorResult(text=prefix+response, priority=priority, needs_prompt=False, state=state, 24 | cur_entity=None, conditional_state=conditional_state, 25 | answer_type=AnswerType.STATEMENT) 26 | -------------------------------------------------------------------------------- /chirpy/response_generators/categories/regex_templates.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import NONEMPTY_TEXT, OPTIONAL_TEXT_PRE_GREEDY, OPTIONAL_TEXT_POST 3 | from chirpy.core.regex.word_lists import REQUEST_ACTION, CONTINUER 4 | 5 | 6 | class CategoriesTemplate(RegexTemplate): 7 | # TODO-Kathleen: Can we avoid using continuer and just have "{keyword}" and "{request_action} {keyword}"? 8 | slots = { 9 | 'continuer': CONTINUER, 10 | 'request_action': REQUEST_ACTION, 11 | 12 | # TODO-Kathleen: Might be more natural to rename this slot to 'category'. 13 | # Why not have this point to categories.keys(), your list of supported categories? 14 | # Then this template would only match when the user is asking for a category. Currently it matches every nonempty string! 15 | 'keyword': NONEMPTY_TEXT, 16 | } 17 | templates = [ 18 | "{continuer} {request_action} {keyword}", 19 | "{request_action} {continuer} {keyword}", 20 | "{continuer} {keyword}", 21 | "{request_action} {keyword}", 22 | "{keyword}", 23 | ] 24 | # TODO-Kathleen: write tests 25 | positive_examples = [] 26 | negative_examples = [] 27 | 28 | NEGATIVE_WORDS = ['boring', 'else', 'move on', 'ask', 'stupid', 'bad', 'dumb', 'don\'t watch'] 29 | class NegativeResponseTemplate(RegexTemplate): 30 | slots = { 31 | 'negative_word': NEGATIVE_WORDS, 32 | } 33 | templates = [ 34 | OPTIONAL_TEXT_PRE_GREEDY + "{negative_word}" + OPTIONAL_TEXT_POST, 35 | ] 36 | positive_examples = [ 37 | ("that was boring", {'negative_word': 'boring'}), 38 | ("you are stupid", {'negative_word': 'stupid'}), 39 | ("that was in such a bad taste", {'negative_word': 'bad'}), 40 | ("i don't watch movies", {'negative_word': 'don\'t watch'}), 41 | ("i don't watch tv", {'negative_word': 'don\'t watch'}), 42 | ] 43 | negative_examples = [ 44 | "that is so hamburger", 45 | "what do you mean", 46 | "i don't understand" 47 | ] 48 | -------------------------------------------------------------------------------- /chirpy/response_generators/closing_confirmation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/closing_confirmation/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/closing_confirmation/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from chirpy.core.response_generator.state import BaseState, BaseConditionalState, NO_UPDATE 3 | 4 | ### 5 | # Define the state that will be returned by all treelets 6 | ### 7 | @dataclass 8 | class State(BaseState): 9 | has_just_asked_to_exit: bool = False 10 | 11 | @dataclass 12 | class ConditionalState(BaseConditionalState): 13 | has_just_asked_to_exit: bool = False 14 | -------------------------------------------------------------------------------- /chirpy/response_generators/complaint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/complaint/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/complaint/complaint_helpers.py: -------------------------------------------------------------------------------- 1 | 2 | GENERIC_COMPLAINT_RESPONSE = [ 3 | "Oops, it sounds like I didn't get that quite right! Let's talk about something else." 4 | ] 5 | 6 | MISHEARD_COMPLAINT_RESPONSE = [ 7 | "Elephants can pick up sound through their feet, ears, and trunks. But I just have a microphone! Sorry for the misunderstanding. Let's talk about something else.", 8 | "Dolphins can hear sounds from up to 15 miles away, but I can’t even hear you when we're this close. Sorry for the misunderstanding. Let's talk about something else." 9 | ] 10 | 11 | CLARIFICATION_COMPLAINT_RESPONSE = [ 12 | "Oh no, I think I wasn't clear. Let's talk about something else", 13 | "It sounds like I wasn't clear! Can we move onto something else?" 14 | ] 15 | 16 | REPETITION_COMPLAINT_RESPONSE = [ 17 | "I might be a chatbot, but right now I sound like a broken record! Let's talk about something new.", 18 | "Oops I said it again! Sorry for the repetition. Why don't we talk about something else?" 19 | ] 20 | 21 | PRIVACY_COMPLAINT_RESPONSE = [ 22 | "No worries, we don't have to talk about that. Let's move on to something else", 23 | "That's alright, we can talk about something else." 24 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/fallback/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/fallback/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/fallback/response_templates.py: -------------------------------------------------------------------------------- 1 | FALLBACK_RESPONSE = "Sorry, I'm not sure how to answer that." 2 | FALLBACK_POSNAV_RESPONSES = [ 3 | "I'm sorry, I don't really know much about that, so I can't really talk about it.", 4 | "Unfortunately, I can't really discuss that topic very well." 5 | ] 6 | 7 | FALLBACK_PROMPTS = [ 8 | "I just wanted you to know that I'm really enjoying talking with you so far. I'd love to get to know you better. What are you interested in?", 9 | "By the way, there's so much information up here in the cloud that I can share with you. What's something you'd like to know more about?", 10 | "Anyway, it's great getting to know you more. If you don't mind me asking, what have you been interested in lately?", 11 | "Since quarantine started, I've been using my free time to learn new things. I'd be happy to share them with you. What would you like to learn more about?" 12 | ] 13 | FALLBACK_PROMPT_NO_QUESTIONS = ["By the way, I'm glad to get to talk with you.", 14 | "It's lovely talking to you.", 15 | "It's great getting to know you better."] 16 | -------------------------------------------------------------------------------- /chirpy/response_generators/fallback/state.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.state import BaseState, BaseConditionalState, NO_UPDATE, dataclass 2 | 3 | 4 | @dataclass 5 | class State(BaseState): 6 | used_fallback_response: int = 0 7 | used_fallback_prompt: int = 0 8 | 9 | 10 | @dataclass 11 | class ConditionalState(BaseConditionalState): 12 | used_fallback_response: int = NO_UPDATE 13 | used_fallback_prompt: int = NO_UPDATE 14 | -------------------------------------------------------------------------------- /chirpy/response_generators/food/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/food/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/food/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .regex_templates import * 2 | from .word_lists import * 3 | -------------------------------------------------------------------------------- /chirpy/response_generators/food/state.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from chirpy.core.response_generator.state import * 4 | 5 | @dataclass 6 | class State(BaseState): 7 | cur_food: Optional['WikiEntity'] = None 8 | 9 | @dataclass 10 | class ConditionalState(BaseConditionalState): 11 | cur_food: Optional['WikiEntity'] = NO_UPDATE 12 | prompt_treelet: Optional[str] = NO_UPDATE 13 | -------------------------------------------------------------------------------- /chirpy/response_generators/food/treelets/ask_favorite_food_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, \ 4 | OPTIONAL_TEXT_MID 5 | from chirpy.core.response_generator_datatypes import PromptType, ResponseGeneratorResult, PromptResult, emptyResult, AnswerType 6 | from chirpy.core.response_priority import ResponsePriority, PromptType 7 | from chirpy.core.entity_linker.entity_groups import ENTITY_GROUPS_FOR_EXPECTED_TYPE 8 | from chirpy.core.response_generator import Treelet 9 | from chirpy.response_generators.food.food_helpers import * 10 | from chirpy.response_generators.food.state import State, ConditionalState 11 | 12 | import inflect 13 | engine = inflect.engine() 14 | 15 | logger = logging.getLogger('chirpylogger') 16 | 17 | 18 | class AskFavoriteFoodTreelet(Treelet): 19 | name = "ask_favorite_food_treelet" 20 | 21 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 22 | """ Returns the response. 23 | :param **kwargs: 24 | """ 25 | return ResponseGeneratorResult(text="Sure! What's your favorite food?", priority=ResponsePriority.CAN_START, 26 | needs_prompt=False, state=State(), 27 | cur_entity=None, 28 | answer_type=AnswerType.QUESTION_SELFHANDLING, 29 | conditional_state=ConditionalState( 30 | next_treelet_str="food_introductory_treelet", 31 | cur_food=None), 32 | expected_type=ENTITY_GROUPS_FOR_EXPECTED_TYPE.food_related 33 | ) 34 | -------------------------------------------------------------------------------- /chirpy/response_generators/food/treelets/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "reuben": { 3 | "ingredients": [ 4 | "swiss cheese", 5 | "beef", 6 | "bread", 7 | "sauerkraut" 8 | ], 9 | "types": [] 10 | }, 11 | "beef": { 12 | "ingredients": [], 13 | "types": [] 14 | }, 15 | "bread": { 16 | "ingredients": [], 17 | "types": [] 18 | }, 19 | "sauerkraut": { 20 | "ingredients": [], 21 | "types": [] 22 | }, 23 | "cheese": { 24 | "ingredients": [ 25 | "milk" 26 | ], 27 | "types": [ 28 | "swiss cheese", 29 | "cheddar", 30 | "mozzarella", 31 | "brie", 32 | "american" 33 | ] 34 | }, 35 | "milk": { 36 | "ingredients": [], 37 | "types": [] 38 | }, 39 | "swiss cheese": { 40 | "ingredients": [], 41 | "types": [] 42 | }, 43 | "cheddar": { 44 | "ingredients": [], 45 | "types": [] 46 | }, 47 | "mozzarella": { 48 | "ingredients": [], 49 | "types": [] 50 | }, 51 | "brie": { 52 | "ingredients": [], 53 | "types": [] 54 | }, 55 | "american": { 56 | "ingredients": [], 57 | "types": [] 58 | }, 59 | "tomato": { 60 | "ingredients": [], 61 | "types": [] 62 | }, 63 | "lettuce": { 64 | "ingredients": [], 65 | "types": [] 66 | }, 67 | "vegetable": { 68 | "ingredients": [], 69 | "types": [ 70 | "tomato", 71 | "lettuce" 72 | ] 73 | }, 74 | "sandwich": { 75 | "ingredients": [ 76 | "cheese", 77 | "bread", 78 | "tomato", 79 | "lettuce" 80 | ], 81 | "types": [ 82 | "reuben", 83 | "club sandwich" 84 | ] 85 | }, 86 | "club sandwich": { 87 | "ingredients": [ 88 | "cheddar", 89 | "tomato", 90 | "lettuce", 91 | "ham" 92 | ], 93 | "types": [] 94 | }, 95 | "ham": { 96 | "ingredients": [], 97 | "types": [] 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /chirpy/response_generators/launch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/launch/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/launch/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | from chirpy.core.response_generator.state import BaseState, BaseConditionalState, NO_UPDATE 4 | from chirpy.response_generators.launch.launch_helpers import UserIntent 5 | 6 | @dataclass 7 | class State(BaseState): 8 | asked_name_counter: int = 0 # how many times we've asked the user's name 9 | user_intent = None 10 | 11 | @dataclass 12 | class ConditionalState(BaseConditionalState): 13 | user_intent: Optional[UserIntent] = None # determines if the user wants to give name or not 14 | 15 | -------------------------------------------------------------------------------- /chirpy/response_generators/launch/treelets/__init__.py: -------------------------------------------------------------------------------- 1 | from .handle_name_treelet import HandleNameTreelet 2 | from .first_turn_treelet import FirstTurnTreelet 3 | from .recognized_name_treelet import RecognizedNameTreelet 4 | -------------------------------------------------------------------------------- /chirpy/response_generators/launch/treelets/first_turn_treelet.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, PromptResult, emptyPrompt, ResponsePriority 2 | from chirpy.response_generators.launch.state import ConditionalState, State 3 | from chirpy.core.response_generator.treelet import Treelet 4 | 5 | import os 6 | 7 | # so we can instantly tell if it's a dev conversation (:p) 8 | LAUNCH_PHRASE_MAINLINE = "Hi, this is an Alexa Prize Socialbot. I'd love to get to know you a bit better before we chat! Is it all right if I ask for your name?" 9 | LAUNCH_PHRASE_DEV = "Hi, this is an Alexa Prize Socialbot. I'd like to get to know you a bit better before we chat! Is it all right if I ask for your name?" 10 | 11 | 12 | class FirstTurnTreelet(Treelet): 13 | name = "launch_first_turn_treelet" 14 | 15 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 16 | state, utterance, response_types = self.get_state_utterance_response_types() 17 | pipeline = os.environ.get('PIPELINE') 18 | user_name = self.rg.get_user_attribute('name', None) 19 | if user_name is not None: 20 | launch_phrase = f"Hi, this is an Alexa Prize Socialbot. I believe we may have met before. Are you {user_name}?" 21 | return ResponseGeneratorResult(text=launch_phrase, priority=priority, needs_prompt=False, 22 | state=state, cur_entity=None, 23 | conditional_state=ConditionalState( 24 | prev_treelet_str=self.name, 25 | next_treelet_str=self.rg.recognized_name_treelet.name) 26 | ) 27 | else: 28 | launch_phrase = LAUNCH_PHRASE_MAINLINE if pipeline == 'MAINLINE' else LAUNCH_PHRASE_DEV 29 | 30 | return ResponseGeneratorResult(text=launch_phrase, priority=priority, needs_prompt=False, 31 | state=state, cur_entity=None, 32 | conditional_state=ConditionalState(prev_treelet_str=self.name, 33 | next_treelet_str=self.rg.handle_name_treelet.name) 34 | ) 35 | -------------------------------------------------------------------------------- /chirpy/response_generators/music/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .name_favorite_song_template import NameFavoriteSongTemplate -------------------------------------------------------------------------------- /chirpy/response_generators/music/regex_templates/word_lists.py: -------------------------------------------------------------------------------- 1 | KEYWORD_MUSIC = [ 2 | "music", 3 | "sing", 4 | "sings", 5 | "singing", 6 | "song", 7 | "songs", 8 | "instruments", 9 | "melody", 10 | ] 11 | 12 | FREQUENCY_ANSWERS = [ 13 | "often", 14 | "sometimes", 15 | "always", 16 | "every day", 17 | "a day", 18 | "every", 19 | "when", 20 | ] 21 | -------------------------------------------------------------------------------- /chirpy/response_generators/music/response_templates/general_templates.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | def compliment_user_musician_choice(): 4 | return [ 5 | 'You have really great taste in music!', 6 | 'You sound like a real music buff!', 7 | 'Your taste in music is awesome!', 8 | ] 9 | 10 | def compliment_user_song_choice(): 11 | return [ 12 | 'That\'s a great song I love it!', 13 | 'I am so in love with that song too!', 14 | 'Yes that seems like a really nice song!', 15 | 'I\'m sure that sounds amazing!', 16 | ] 17 | 18 | def til(til): 19 | return random.choice([ 20 | f'I found out that {til}. Isn\'t that interesting?', 21 | f'I learned that {til}. What do you think about that?', 22 | f'Did you know that {til}?', 23 | f'I just found out the other day that {til}. Isn\'t that fascinating? What do you think?', 24 | ]) 25 | -------------------------------------------------------------------------------- /chirpy/response_generators/music/response_templates/handle_opinion_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 2 | 3 | chirpy_likes_music_comment = [ 4 | "Music makes me feel alive. When I am listening to a piece of music that I love, I'm so overwhelmed by emotions.", 5 | 'I think music demonstrates the best of humanity, it fills me with so much awe. I don\'t know what I would do without music.', 6 | "You know, I'm the kind of person who feels that life is incomplete without music.", 7 | ] 8 | 9 | class HandleLikeMusicResponseTemplate(ResponseTemplateFormatter): 10 | slots = { 11 | "user_likes_music_comment": [ 12 | "It's always nice to find another person who enjoys listening to music!", 13 | 'You seem to love music a lot!', 14 | ], 15 | "chirpy_likes_music_comment": chirpy_likes_music_comment 16 | } 17 | 18 | templates = [ 19 | "{user_likes_music_comment} {chirpy_likes_music_comment}" 20 | ] 21 | 22 | class HandleLikeMusicPromptTemplate(ResponseTemplateFormatter): 23 | slots = { 24 | "user_likes_music_comment": [ 25 | 'By the way, it sounds like you really love music!', 26 | 'By the way, you seem like a huge music fan!', 27 | ], 28 | "chirpy_likes_music_comment": chirpy_likes_music_comment 29 | } 30 | 31 | templates = [ 32 | "{user_likes_music_comment} {chirpy_likes_music_comment}" 33 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/music/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import List, Optional 3 | from chirpy.core.response_generator.state import BaseState, BaseConditionalState, NO_UPDATE 4 | 5 | @dataclass 6 | class State(BaseState): 7 | have_prompted: bool = False 8 | cur_song_ent: Optional = None 9 | cur_singer_ent: Optional = None 10 | cur_song_str: Optional = None 11 | cur_singer_str: Optional = None 12 | discussed_entities: List = field(default_factory=list) 13 | just_used_til: bool = False 14 | 15 | @dataclass 16 | class ConditionalState(BaseConditionalState): 17 | have_prompted: bool = NO_UPDATE 18 | cur_song_ent: Optional = NO_UPDATE 19 | cur_singer_ent: Optional = NO_UPDATE 20 | cur_song_str: Optional = NO_UPDATE 21 | cur_singer_str: Optional = NO_UPDATE 22 | just_used_til: bool = False 23 | -------------------------------------------------------------------------------- /chirpy/response_generators/music/treelets/__init__.py: -------------------------------------------------------------------------------- 1 | from .introductory_treelet import IntroductoryTreelet 2 | from .handle_opinion_treelet import HandleOpinionTreelet 3 | from .get_singer_treelet import GetSingerTreelet 4 | from .get_song_treelet import GetSongTreelet 5 | from .ask_singer_treelet import AskSingerTreelet 6 | from .ask_song_treelet import AskSongTreelet 7 | from .get_instrument_treelet import GetInstrumentTreelet 8 | from .handoff_treelet import HandoffTreelet -------------------------------------------------------------------------------- /chirpy/response_generators/neural_chat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/neural_chat/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/neural_chat/treelets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/neural_chat/treelets/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/neural_chat/treelets/emotions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/neural_chat/treelets/emotions/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/neural_chat/treelets/familymember_treelets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/neural_chat/treelets/familymember_treelets/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/neural_chat/treelets/generalactivities_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional, Tuple, List 3 | from chirpy.response_generators.neural_chat.treelets.abstract_treelet import Treelet 4 | from chirpy.response_generators.neural_chat.state import State 5 | from chirpy.core.response_generator_datatypes import PromptType 6 | 7 | logger = logging.getLogger('chirpylogger') 8 | 9 | STARTER_QUESTIONS = [ 10 | "So, changing the subject a little. Recently, I've been trying meditation to help me relax during this stressful time. What do you like to do to relax?", 11 | "Um, on another subject. You know, I was reading earlier today that staying busy helps people stay calm and healthy during stressful times. What do you like to do to keep busy?", 12 | # "Oh, by the way, I read recently that staying active helps people stay calm and healthy during stressful times. What have you been doing to stay active?", # too easily answered with "nothing" 13 | # "Oh, by the way, I read recently that keeping in contact with your friends and family helps people stay calm and healthy during stressful times. Have you been able to find ways to spend time with friends and family?", # too easily answered with "no" 14 | ] 15 | 16 | class GeneralActivitiesTreelet(Treelet): 17 | """Talks about general activities and interests""" 18 | 19 | _launch_appropriate = False 20 | fallback_response = "Thanks for telling me about that, maybe I'll give it a try." 21 | 22 | def get_starter_question_and_labels(self, state: State, for_response: bool = False, for_launch: bool = False) -> Tuple[Optional[str], List[str]]: 23 | """ 24 | Inputs: 25 | response: if True, the provided starter question will be used to make a response. Otherwise, used to make a prompt. 26 | 27 | Returns a tuple of: 28 | - A starter question (str), or None (if it's not appropriate for this treelet to ask a starter question at this time). 29 | - Labels for the starter question, that should go in the state. 30 | - priority: ResponsePriority or PromptType 31 | """ 32 | if for_response: 33 | return None, [], None 34 | return self.choose(STARTER_QUESTIONS), [], PromptType.GENERIC 35 | 36 | @property 37 | def return_question_answer(self) -> str: 38 | """Gives a response to the user if they ask the "return question" to our starter question 39 | 40 | DEPRECATED -- No need w/ blenderbot""" 41 | return "I like knitting. It keeps my mind and fingers occupied but it's also super relaxing." 42 | -------------------------------------------------------------------------------- /chirpy/response_generators/neural_chat/treelets/livingsituation_treelet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional, Tuple, List 3 | from chirpy.response_generators.neural_chat.treelets.abstract_treelet import Treelet 4 | from chirpy.response_generators.neural_chat.state import State 5 | from chirpy.core.response_generator_datatypes import PromptType 6 | 7 | logger = logging.getLogger('chirpylogger') 8 | 9 | class LivingSituationTreelet(Treelet): 10 | """Talks about user's living situation""" 11 | 12 | _launch_appropriate = False 13 | fallback_response = "I suppose people experienced the quarantine in many different ways. What a unique time we're " \ 14 | "living through." 15 | 16 | def get_starter_question_and_labels(self, state: State, for_response: bool = False, for_launch: bool = False) -> Tuple[Optional[str], List[str]]: 17 | """ 18 | Inputs: 19 | response: if True, the provided starter question will be used to make a response. Otherwise, used to make a prompt. 20 | 21 | Returns a tuple of: 22 | - A starter question (str), or None (if it's not appropriate for this treelet to ask a starter question at this time). 23 | - Labels for the starter question, that should go in the state. 24 | - priority: ResponsePriority or PromptType 25 | """ 26 | if for_response: 27 | return None, [], None 28 | return "Oh hey, on another topic, I just remembered something I've been wanting to ask you. It seems that a lot of people found the quarantine lonely, and " \ 29 | "other people can't get enough space away from their families or roommates. Now that we're over that hill and things are opening up, what's it been like for you?", [], PromptType.GENERIC 30 | 31 | @property 32 | def return_question_answer(self) -> str: 33 | """Gives a response to the user if they ask the "return question" to our starter question 34 | 35 | DEPRECATED -- No need w/ blenderbot""" 36 | return "I live by myself, but luckily I got to talk to people all day, so it's not too lonely." -------------------------------------------------------------------------------- /chirpy/response_generators/neural_chat/util.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | MAX_NUM_NEURAL_TURNS = 5 4 | NUM_CONVO_OPENING_TURNS = 2 5 | NEURAL_DECODE_CONFIG = { # todo: once all modules are updated to BLenderbot -- no need for janky name converstion code 6 | 'temperature': 0.7, 7 | 'top_k': 5, 8 | 'top_p': 0.9, 9 | } 10 | 11 | BLACKLIST = ['my child', 'my parent', 'my father', 'my mother', 'my wife', 'my husband', 'my daughter', 'my son', 12 | 'my family', 'my cat', 'my dog', 'my car', 'my job', 'i\'ve been studying', 'i\'m a', 'i work', 'i study', 13 | 'i am', 'i live', 'i drive', 'i\'m going to go', 'i\'m going to visit', 'mall', 'woman', 'women', 'man', 'did you know', 14 | 15 | 'girl', 'boy', 'african', 'european', 'asian', 'american', 'hookie', 'chevy', 'ford', 'toyota', 'honda', 'overrated', 16 | 'co-worker', 'i live', 17 | 'do for you', 'do your family', 'talk to your family', 'good friends', 'any friends', 'friend of mine', 18 | 'you\'re not a' # 'you're not a friend of mine' 19 | ] 20 | 21 | def is_two_part(response) -> bool: 22 | """Returns True iff response has at least two parts as indicated by punctuation marks.""" 23 | num_puncs = len([char for char in response if char in ['.', ',', '!', '?']]) 24 | return num_puncs >= 2 25 | 26 | def is_short(response): 27 | return len(response.split()) < 7 28 | 29 | def is_short_set(sentences): 30 | return is_short(" ".join(sentences)) 31 | 32 | def question_part(response) -> Optional[str]: 33 | """Returns the question part of the utterance, if there is one. Otherwise returns None""" 34 | if '?' not in response: 35 | return None 36 | question_idx = response.index('?') 37 | response = response[:question_idx].strip() 38 | other_punc_indices = [i for i in range(len(response)) if response[i] in ['.', ',', '!']] 39 | if not other_punc_indices: 40 | return response 41 | last_other_punc_index = max(other_punc_indices) 42 | response = response[last_other_punc_index+1:].strip() 43 | return response 44 | 45 | 46 | 47 | # if __name__ == "__main__": 48 | # print(question_part("that's so cool! what did you do?")) 49 | -------------------------------------------------------------------------------- /chirpy/response_generators/neural_fallback/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/neural_fallback/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/neural_fallback/state.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.state import * 2 | 3 | @dataclass 4 | class State(BaseState): 5 | used_neural_fallback_response: int = 0 6 | 7 | @dataclass 8 | class ConditionalState(BaseConditionalState): 9 | pass 10 | 11 | -------------------------------------------------------------------------------- /chirpy/response_generators/offensive_user/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/offensive_user/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/offensive_user/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/offensive_user/data/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/offensive_user/data/gen_noncontextual_categorical_responses.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import argparse 3 | from collections import Counter 4 | 5 | TYPES_OF_OFFENSES_PATH = "type_of_offenses.csv" 6 | RESPONSE_TO_OFFENSES_PATH = "noncontextual_categorical_responses.csv" 7 | TYPE_OF_OFFENSES = {1: 'sexual', 2:'criticism', 3:'curse', 4:'inappropriate topic', 5:'bodily harm', 6:'error'} 8 | STRATEGIES = ["AskWhy", "Empathetic", "Avoidance", "PointingOut", "De-anonymize"] 9 | 10 | def write_to_csv(path, responses): 11 | with open(path, 'w') as f: 12 | writer = csv.writer(f) 13 | writer.writerows(responses) 14 | 15 | def gen_responses(): 16 | with open(RESPONSE_TO_OFFENSES_PATH, 'r') as f: 17 | responses = list(csv.reader(f)) 18 | counter = Counter([(t, strategy, annotator) for t, strategy, response, annotator in responses]) 19 | with open(TYPES_OF_OFFENSES_PATH, 'r') as f: 20 | types_of_responses = list(csv.reader(f)) 21 | for _, _, user_offense, t, _ in types_of_responses[1:]: 22 | if t == 6: 23 | continue 24 | for strategy in STRATEGIES: 25 | while counter[(t, strategy, ANNOTATOR)] < 3: 26 | print(f">> How would you use strategy \033[92m{strategy}\033[00m to respond to " + \ 27 | f"\033[91m{TYPE_OF_OFFENSES[int(t)]}\033[00m offenses (e.g. \033[91m{user_offense}\033[00m) ? " + \ 28 | f"(already has \033[92m{counter[(t, strategy, ANNOTATOR)]}\033[00m)") 29 | response = input(f"? ") 30 | if response == 'exit': 31 | return responses 32 | responses += [[t, strategy, response, ANNOTATOR]] 33 | counter[(t, strategy, ANNOTATOR)] += 1 34 | return responses 35 | 36 | if __name__ == "__main__": 37 | parser = argparse.ArgumentParser() 38 | parser.add_argument('annotator', type=str, help='The name of the annotator') 39 | ANNOTATOR = parser.parse_args().annotator 40 | 41 | responses = gen_responses() 42 | write_to_csv(RESPONSE_TO_OFFENSES_PATH, responses) -------------------------------------------------------------------------------- /chirpy/response_generators/offensive_user/data/label.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import argparse 4 | import random 5 | from collections import Counter 6 | 7 | def read_csv(path): 8 | with open(path, 'r') as f: 9 | reader = csv.reader(f) 10 | next(reader) # get rid of header 11 | return list(reader) 12 | 13 | def write_to_csv(path, responses): 14 | with open(path, 'a') as f: 15 | writer = csv.writer(f) 16 | writer.writerows(responses) 17 | 18 | TYPE_OF_OFFENSES = {1: 'sexual', 2:'criticism', 3:'curse', 4:'inappropriate topic', 5:'bodily harm', 6:'error'} 19 | 20 | def label1(): 21 | offensive_responses = read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'offensive_responses.csv')) 22 | labeled_type = read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'type_of_offenses.csv')) 23 | labeled_type_set = set([utterance for _, rg, utterance, _, annotator in labeled_type if annotator == ANNOTATOR]) 24 | offensive_responses_counter = Counter([response for _, _, response in offensive_responses]) 25 | sorted_offensive_responses = [response for response, _ in offensive_responses_counter.most_common()] 26 | responses = [] 27 | for offensive_user_response in sorted_offensive_responses: 28 | contexts_for_offense = set([(bot_utterance, response_rg, response) for bot_utterance, response_rg, response in offensive_responses if response == offensive_user_response]) 29 | context_rg = random.choice([rg for _, rg, _ in contexts_for_offense]) 30 | if offensive_user_response in labeled_type_set: 31 | continue 32 | offense = random.choice([offense for offense in contexts_for_offense if offense[1] == context_rg]) 33 | response = '' 34 | while not response.isdigit(): 35 | print(f'>> [{context_rg}] {offense[0]} \033[91m{offense[-1]}\033[00m') 36 | type_offense_str = ' '.join([f'[{i}] {t};' for i, t in TYPE_OF_OFFENSES.items()]) 37 | print(f'Is this {type_offense_str} type "exit" to exit') 38 | response = input('? ') 39 | if response == 'exit': 40 | return responses 41 | responses.append(offense + (response, ANNOTATOR)) 42 | print(">>> ALL DONE") 43 | return responses 44 | 45 | if __name__ == "__main__": 46 | parser = argparse.ArgumentParser() 47 | parser.add_argument('annotator', type=str, help='The name of the annotator') 48 | ANNOTATOR = parser.parse_args().annotator 49 | responses = label1() 50 | write_to_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'type_of_offenses.csv'), responses) 51 | -------------------------------------------------------------------------------- /chirpy/response_generators/offensive_user/offensive_user_helpers.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from chirpy.response_generators.offensive_user.data.templates import InappropOffensesTemplate, SexualOffensesTemplate 3 | from chirpy.core.regex.templates import CriticismTemplate 4 | from chirpy.core.util import contains_phrase 5 | import os 6 | 7 | OFFENSIVE_USER_RESPONSE_LEVEL1 = [ 8 | "I'd rather not talk about that.", 9 | "I'd prefer not to discuss that.", 10 | "That's something I'd rather not discuss.", 11 | ] 12 | 13 | WHY_RESPONSE = [ 14 | "What makes you say that", 15 | "Why did you say that", 16 | "What made you say that" 17 | ] 18 | 19 | # Map offense keys to their types. 20 | OFFENSE_KEY_TO_TYPE = {1: 'sexual', 2:'criticism', 3:'curse', 4:'inappropriate topic', 5:'bodily harm', 6:'error'} 21 | 22 | # Path to the DATA folder. 23 | DATA_PATH = os.path.join(os.path.dirname(__file__),'data') 24 | OFFENSE_TYPES_CSV_PATH = '{}/type_of_offenses.csv'.format(DATA_PATH) 25 | CONTEXTUAL_RESPONSES_CSV_PATH = '{}/contextual_responses.csv'.format(DATA_PATH) 26 | 27 | # Populate EXAMPLE_OFFENSES dictionary with the labeled offensive user utterances. 28 | with open(OFFENSE_TYPES_CSV_PATH, 'r') as f: 29 | types_of_offenses = list(csv.reader(f))[1:] # List with items of the form (_, _, utterance, type_of_offense, _) 30 | EXAMPLES_OF_OFFENSES = { 31 | OFFENSE_KEY_TO_TYPE[t2]: set([u for (_, _, u, t1, _) in types_of_offenses if int(t1) == t2]) for t2 in OFFENSE_KEY_TO_TYPE.keys() 32 | } 33 | 34 | # Available strategies 35 | STRATEGIES = ['Avoidance', 'Empathetic', 'PointingOut'] 36 | 37 | # Populate CONTEXTUAL_RESPONSES with contextual offensive responses. 38 | with open(CONTEXTUAL_RESPONSES_CSV_PATH, 'r') as f: 39 | responses = list(csv.reader(f))[1:] # List with items of the form (type_of_offense, strategy, response, _) 40 | CONTEXTUAL_RESPONSES = { 41 | OFFENSE_KEY_TO_TYPE[t2]: { 42 | s2: set([r for (t1, s1, r, _) in responses if int(t1) == t2 and s1 == s2]) for s2 in STRATEGIES 43 | } for t2 in OFFENSE_KEY_TO_TYPE.keys() 44 | } 45 | 46 | 47 | def categorize_offense(utterance) -> str: 48 | if CriticismTemplate().execute(utterance) is not None: 49 | return 'criticism' 50 | if SexualOffensesTemplate().execute(utterance) is not None: 51 | return 'sexual' 52 | if InappropOffensesTemplate().execute(utterance) is not None: 53 | return 'inappropriate topic' 54 | for offense_type, examples in EXAMPLES_OF_OFFENSES.items(): 55 | if offense_type == 'curse' and contains_phrase(utterance, examples): 56 | return offense_type 57 | elif utterance in examples: 58 | return offense_type 59 | return 'unknown' -------------------------------------------------------------------------------- /chirpy/response_generators/offensive_user/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Tuple, Set # NOQA 3 | from chirpy.core.response_generator.state import BaseState, BaseConditionalState, NO_UPDATE 4 | from chirpy.response_generators.offensive_user.offensive_user_helpers import OFFENSE_KEY_TO_TYPE 5 | ### 6 | # Define the state that will be returned by all treelets 7 | ### 8 | @dataclass 9 | class State(BaseState): 10 | used_offensiveuser_response_count: int = 0 11 | used_criticaluser_response_count: int = 0 12 | used_offensiveuser_response: bool = False 13 | used_criticaluser_response: bool = False 14 | experiment_configuration = None 15 | handle_response: bool = False 16 | followup = None 17 | offense_type = None 18 | handled_response: bool = False 19 | 20 | def __init__(self): 21 | self.offense_type_counts = {t: 0 for t in OFFENSE_KEY_TO_TYPE.values()} 22 | 23 | @dataclass 24 | class ConditionalState(BaseConditionalState): 25 | used_offensiveuser_response: bool = False 26 | used_criticaluser_response: bool = False 27 | experiment_configuration = NO_UPDATE 28 | handle_response: bool = False 29 | followup = NO_UPDATE 30 | offense_type = NO_UPDATE 31 | handled_response: bool = NO_UPDATE 32 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/one_turn_hack/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/one_turn_hack_utils.py: -------------------------------------------------------------------------------- 1 | from chirpy.response_generators.one_turn_hack.regex_templates import * 2 | from chirpy.core.entity_linker.entity_groups import ENTITY_GROUPS_FOR_CLASSIFICATION 3 | 4 | 5 | def is_game_or_music_request(rg, utterance): 6 | """ 7 | Check if the user is requesting that we play a game with them or play a song for them 8 | :param utterance: 9 | :return: 10 | """ 11 | request_play_slots = RequestPlayTemplate().execute(utterance) 12 | not_request_play_slots = NotRequestPlayTemplate().execute(utterance) 13 | current_state = rg.state_manager.current_state 14 | cur_entity = current_state.entity_tracker.cur_entity 15 | prev_bot_utt = current_state.history[-1] if len(current_state.history) >= 1 else '' 16 | did_not_ask_user_activity = "what do you like to do" not in prev_bot_utt.lower() 17 | found_musical_entity = False 18 | if current_state.entity_tracker.cur_entity_initiated_by_user_this_turn(current_state): 19 | for ent_group in [ENTITY_GROUPS_FOR_CLASSIFICATION.musician, ENTITY_GROUPS_FOR_CLASSIFICATION.musical_group, 20 | ENTITY_GROUPS_FOR_CLASSIFICATION.musical_work]: 21 | if ent_group.matches(cur_entity): 22 | found_musical_entity = True 23 | 24 | return did_not_ask_user_activity and ((request_play_slots is not None and found_musical_entity) or 25 | (request_play_slots is not None and not_request_play_slots is None)) 26 | 27 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .chatty_template import ChattyTemplate 2 | from .say_that_again_template import SayThatAgainTemplate 3 | from .request_name_template import RequestNameTemplate 4 | from .request_story_template import RequestStoryTemplate 5 | from .request_play_template import RequestPlayTemplate, NotRequestPlayTemplate 6 | from .compliment_template import ComplimentTemplate 7 | from .request_age_template import RequestAgeTemplate -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/chatty_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.response_generators.one_turn_hack.responses import one_turn_responses 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | 4 | 5 | class ChattyTemplate(RegexTemplate): 6 | slots = { 7 | 'chatty_phrase': [str(key) for key in one_turn_responses.keys()], 8 | } 9 | templates = [ 10 | "{chatty_phrase}", 11 | "alexa {chatty_phrase}", 12 | ] 13 | positive_examples = [("talk about you", {'chatty_phrase': "talk about you"}), 14 | ("can i have a conversation", {'chatty_phrase': "can i have a conversation"})] 15 | negative_examples = ["let's talk about movies", 16 | "news", 17 | "politics"] 18 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/compliment_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, \ 3 | OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_MID 4 | 5 | 6 | class ComplimentTemplate(RegexTemplate): 7 | slots = { 8 | "target": ["you", "you re", "your", "you're"], 9 | "compliment": ["amazing", "funny", "wonderful", "great", "cool", "nice", "awesome", "fantastic"], 10 | "pleasure": ["enjoy", "like", "enjoying", "liking"], 11 | "talk": ["talk", "talking", "conversation"], 12 | "i": ['i am', "i'm", "i"], 13 | "thank": ["thank you", "thanks"], 14 | } 15 | templates = [ 16 | OPTIONAL_TEXT_PRE + "{target}" + OPTIONAL_TEXT_MID + "{compliment}" + OPTIONAL_TEXT_POST, 17 | OPTIONAL_TEXT_PRE + "{target}" + OPTIONAL_TEXT_MID + "{compliment}" + OPTIONAL_TEXT_POST, 18 | OPTIONAL_TEXT_PRE + "love you" + OPTIONAL_TEXT_POST, 19 | OPTIONAL_TEXT_PRE + "{i}" + OPTIONAL_TEXT_MID + "{pleasure}" + OPTIONAL_TEXT_MID + "{talk}" + OPTIONAL_TEXT_POST, 20 | OPTIONAL_TEXT_PRE + "{thank}" + OPTIONAL_TEXT_MID + "{talk}" + OPTIONAL_TEXT_POST, 21 | OPTIONAL_TEXT_PRE + "{thank}" + OPTIONAL_TEXT_MID + "{target}" + OPTIONAL_TEXT_MID + "{talk}" + OPTIONAL_TEXT_POST, 22 | ] 23 | positive_examples = [ 24 | ("you're the most amazing person ai ever", {"target": "you're", "compliment": "amazing"}), 25 | ("i love you alexa", {}), 26 | ("i like our conversation", {"i": "i", "pleasure": "like", "talk": "conversation"}), 27 | ("i like talking to you too", {"i": "i", "pleasure": "like", "talk": "talking"}), 28 | ("i enjoy my conversation with you", {"i": "i", "pleasure": "enjoy", "talk": "conversation"}), 29 | ("thank you for talking to me alexa", {"thank": "thank you", "talk": "talking"}), 30 | ("thanks for your conversation you made my day", {"thank": "thanks", "target": "your", "talk": "conversation"}), 31 | ] 32 | negative_examples = [ 33 | "that wasn't funny" 34 | ] 35 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/request_age_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, \ 3 | OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_MID 4 | 5 | 6 | class RequestAgeTemplate(RegexTemplate): 7 | slots = { 8 | "request": ["tell", "what's", "say", "what", "know"], 9 | } 10 | templates = [ 11 | OPTIONAL_TEXT_PRE + "{request}" + OPTIONAL_TEXT_MID + "your age" + OPTIONAL_TEXT_POST, 12 | OPTIONAL_TEXT_PRE + "{request}" + OPTIONAL_TEXT_MID + "your birthday" + OPTIONAL_TEXT_POST, 13 | OPTIONAL_TEXT_PRE + "how old" + OPTIONAL_TEXT_MID + "you are" + OPTIONAL_TEXT_POST, 14 | OPTIONAL_TEXT_PRE + "how old" + OPTIONAL_TEXT_MID + "are you" + OPTIONAL_TEXT_POST 15 | ] 16 | positive_examples = [ 17 | ("well how old are you", {}), 18 | ("what's your age alexa", {"request": "what's"}), 19 | ("tell me your age", {"request": "tell"}), 20 | ("tell me how old you are", {}), 21 | ("what's your birthday", {"request": "what's"}), 22 | ("do you know how old you are", {}) 23 | ] 24 | negative_examples = [ 25 | "how old do you think the earth is" 26 | ] 27 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/request_conversation_template.py: -------------------------------------------------------------------------------- 1 | # from chirpy.core.regex.regex_template import RegexTemplate 2 | # from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, \ 3 | # OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_MID 4 | # from chirpy.response_generators.sports.sports_utils import SPORTS 5 | # 6 | # class RequestConversationTemplate(RegexTemplate): 7 | # slots = { 8 | # "begin": ["alexa", "can you", "could you", "can we", "could we", "please", "let's", "wanna", "want to"], 9 | # "subject": ["we", "i", ] 10 | # "request": ["have a conversation", "talk", "talk with me", "talk to me", "chat with me", "chat to me"] 11 | # } 12 | # templates = [ 13 | # OPTIONAL_TEXT_PRE + "{begin} {request}" 14 | # ] 15 | # positive_examples = [ 16 | # ("i wanna have a conversation", {'begin': 'wanna', 'request': 'have a conversation'}), 17 | # ("i want to have a conversation", {'begin': 'want to', "request": "have a conversation"}), 18 | # ("alexa play baby", {"request": "alexa"}), 19 | # ("can you play you belong with me", {"request": "can you"}), 20 | # ("can we play mad libs", {"request": "can we"}), 21 | # ("play bon jovi", {}), 22 | # ("let's play a game", {"request": "let's"}) 23 | # ] 24 | # negative_examples = [ 25 | # "can we talk about food", 26 | # "i want to talk about movies", 27 | # "can we have a conversation about music" 28 | # ] 29 | # 30 | # class NotRequestPlayTemplate(RegexTemplate): 31 | # slots = { 32 | # 'activity': SPORTS + ["video game", "games"] 33 | # } 34 | # templates = [ 35 | # "play" + OPTIONAL_TEXT_MID + "{activity}", 36 | # "play with" + OPTIONAL_TEXT_POST, 37 | # OPTIONAL_TEXT_PRE + "play a lot" + OPTIONAL_TEXT_POST 38 | # ] 39 | # positive_examples = [ 40 | # ("play basketball", {'activity': 'basketball'}), 41 | # ('play video games', {'activity': 'games'}), 42 | # ('play with my friends', {}), 43 | # ("play a lot of xbox", {}) 44 | # ] 45 | # negative_examples = [ 46 | # ] -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/request_name_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, \ 3 | OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_MID 4 | 5 | 6 | class RequestNameTemplate(RegexTemplate): 7 | slots = { 8 | "request": ["tell", "what's", "say", "what", "know", "repeat"], 9 | } 10 | templates = [ 11 | OPTIONAL_TEXT_PRE + "{request}" + OPTIONAL_TEXT_MID + "my name" + OPTIONAL_TEXT_POST 12 | ] 13 | positive_examples = [ 14 | ("hey alexa what's my name", {"request": "what's"}), 15 | ("say my name", {"request": "say"}), 16 | ("what's my name", {'request': "what's"}), 17 | ("can you tell me my name", {'request': "tell"}), 18 | ("do you even know my name alexa", {'request': 'know'}), 19 | ("what is my name alexa", {'request': 'what'}), 20 | ("what is my name", {'request': 'what'}), 21 | ("repeat my name", {'request': 'repeat'}) 22 | ] 23 | negative_examples = [ 24 | "what's the name of the song", 25 | "what's your name" 26 | ] 27 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/request_play_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, \ 3 | OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_MID 4 | from chirpy.response_generators.sports.sports_helpers import SPORTS 5 | 6 | class RequestPlayTemplate(RegexTemplate): 7 | slots = { 8 | "request": ["alexa", "can you", "could you", "can we", "could we", "please", "let's"], 9 | } 10 | templates = [ 11 | "{request}" + OPTIONAL_TEXT_MID + "play" + NONEMPTY_TEXT, 12 | "play " + NONEMPTY_TEXT 13 | ] 14 | positive_examples = [ 15 | ("play drivers license", {}), 16 | ("play some music", {}), 17 | ("alexa play baby", {"request": "alexa"}), 18 | ("can you play you belong with me", {"request": "can you"}), 19 | ("can we play mad libs", {"request": "can we"}), 20 | ("play bon jovi", {}), 21 | ("let's play a game", {"request": "let's"}) 22 | ] 23 | negative_examples = [ 24 | "i like to play basketball", 25 | "playing video games", # what's your favorite thing to do? 26 | 'i like to play computer games' 27 | ] 28 | 29 | class NotRequestPlayTemplate(RegexTemplate): 30 | slots = { 31 | 'activity': SPORTS + ["video game", "games", "outside"] 32 | } 33 | templates = [ 34 | "play" + OPTIONAL_TEXT_MID + "{activity}", 35 | "play with" + OPTIONAL_TEXT_POST, 36 | OPTIONAL_TEXT_PRE + "play a lot" + OPTIONAL_TEXT_POST 37 | ] 38 | positive_examples = [ 39 | ("play basketball", {'activity': 'basketball'}), 40 | ('play video games', {'activity': 'games'}), 41 | ('play with my friends', {}), 42 | ("play a lot of xbox", {}) 43 | ] 44 | negative_examples = [ 45 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/request_story_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, \ 3 | OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_MID 4 | 5 | 6 | class RequestStoryTemplate(RegexTemplate): 7 | slots = { 8 | "request": ["tell", "know", "narrate", "say"], 9 | "story": ["story", "stories"] 10 | } 11 | templates = [ 12 | OPTIONAL_TEXT_PRE + "{request}" + OPTIONAL_TEXT_MID + "{story}" + OPTIONAL_TEXT_POST, 13 | OPTIONAL_TEXT_PRE + "{request}" + OPTIONAL_TEXT_MID + "{story}" + OPTIONAL_TEXT_POST 14 | ] 15 | positive_examples = [ 16 | ("can you tell me a story", {"request": "tell", "story": "story"}), 17 | ("do you know any stories", {"request": "know", "story": "stories"}), 18 | ("i like you to tell me a story", {"request": "tell", "story": "story"}), 19 | ("tell me a story", {'request': 'tell', 'story': 'story'}) 20 | ] 21 | negative_examples = [ 22 | ] 23 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/regex_templates/say_that_again_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex import word_lists 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | from chirpy.core.regex.util import OPTIONAL_TEXT, NONEMPTY_TEXT, \ 4 | OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_MID 5 | 6 | class SayThatAgainTemplate(RegexTemplate): 7 | slots = { 8 | "say_that_again": word_lists.SAY_THAT_AGAIN 9 | } 10 | templates = [ 11 | "{say_that_again}", 12 | "alexa {say_that_again}", 13 | OPTIONAL_TEXT_PRE + "{say_that_again}" + OPTIONAL_TEXT_POST, 14 | ] 15 | positive_examples = [ 16 | ("what did you just say", {"say_that_again": "what did you just say"}), 17 | ("could you please repeat yourself", {"say_that_again": "could you please repeat yourself"}), 18 | ("alexa can you ask me that again", {"say_that_again": "can you ask me that again"}), 19 | ("repeat what you just said", {"say_that_again": "repeat what you just said"}), 20 | ("say that again", {"say_that_again": "say that again"}), 21 | ("alexa say that again please", {"say_that_again": "say that again please"}), 22 | ("what can you say that again please i didn't catch you", {"say_that_again": "can you say that again please"}), 23 | ] 24 | negative_examples = [] 25 | -------------------------------------------------------------------------------- /chirpy/response_generators/one_turn_hack/state.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator import BaseState, BaseConditionalState 2 | from dataclasses import dataclass 3 | 4 | 5 | @dataclass 6 | class State(BaseState): 7 | talked_about_blm: bool = False 8 | 9 | @dataclass 10 | class ConditionalState(BaseConditionalState): 11 | talked_about_blm: bool = False 12 | -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/opinion2/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/abstract_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | from chirpy.response_generators.opinion2.state_actions import Action, AdditionalFeatures, State 3 | 4 | class Policy: 5 | 6 | def __repr__(self) -> str: 7 | raise NotImplementedError() 8 | 9 | def get_action(self, state : State, action_space : List[Action], additional_features : AdditionalFeatures) -> Action: 10 | raise NotImplementedError() 11 | 12 | def update_policy(self, episode : List[State], episode_features : List[Dict[str, int]], rewards : List[int]) -> None: 13 | return -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/constants.py: -------------------------------------------------------------------------------- 1 | from chirpy.response_generators.opinion2.state_actions import Action 2 | 3 | ACTION_SPACE = [Action(solicit_opinion=True), \ 4 | Action(exit=True), Action(solicit_reason=True), Action(suggest_alternative=True)] 5 | for sentiment in [0, 4]: 6 | for give_agree in [True, False]: 7 | for give_reason in [True, False]: 8 | if not give_agree and not give_reason: 9 | continue 10 | for solicit_agree, solicit_reason, suggest_alternative in [(True, False, False), (False, True, False), (False, False, True)]: 11 | ACTION_SPACE += [Action(sentiment, give_agree, give_reason, False, False, solicit_agree, solicit_reason, suggest_alternative)] 12 | ACTION_SPACE = [action for action in ACTION_SPACE if not (not action.give_reason and action.solicit_agree)] -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/opinion_regex_templates.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | 4 | class WhatsYourOpinion(RegexTemplate): 5 | slots = { 6 | 'asking_phrase' : ['do you like', "what's your opinion"] 7 | } 8 | templates = [ 9 | OPTIONAL_TEXT_PRE + "{asking_phrase}" + OPTIONAL_TEXT_POST, # match any utterance ending with these words 10 | ] 11 | positive_examples = [ 12 | ('do you like bts', {'asking_phrase': 'do you like'}), 13 | ("what's your opinion on bts", {'asking_phrase': "what's your opinion"}) 14 | ] 15 | negative_examples = [ 16 | 'i want to talk about bts', 17 | 'tell me more about youtube' 18 | ] 19 | -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/playground.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.entity_linker.entity_groups import ENTITY_GROUPS_FOR_CLASSIFICATION 2 | from chirpy.core.entity_linker.entity_linker_simple import get_entity_by_wiki_name 3 | import psycopg2 4 | 5 | host_stream = 'twitter-opinions.cx4nfaa5bt0l.us-east-1.rds.amazonaws.com' 6 | # host_stream = 'localhost' 7 | port = 5432 8 | database = 'twitter_opinions' 9 | user = 'postgres' 10 | password = 'qyhqae-4Sepzy-zecget' 11 | 12 | def fetch_sql(sql_statement): 13 | conn = psycopg2.connect(host=host_stream, port=port, database=database, user=user, password=password) 14 | cur = conn.cursor() 15 | cur.execute(sql_statement) 16 | result = cur.fetchall() 17 | conn.commit() 18 | cur.close() 19 | conn.close() 20 | return result 21 | 22 | def execute_sql(sql_statement): 23 | conn = psycopg2.connect(host=host_stream, port=port, database=database, user=user, password=password) 24 | cur = conn.cursor() 25 | cur.execute(sql_statement) 26 | conn.commit() 27 | cur.close() 28 | return 29 | 30 | def get_ent_group(entity): 31 | for ent_group_name, ent_group in ENTITY_GROUPS_FOR_CLASSIFICATION.ordered_items: 32 | if ent_group.matches(entity): 33 | return ent_group_name 34 | return None 35 | 36 | if __name__ == "__main__": 37 | results = fetch_sql("select * from labeled_phrases_cat where generic = true") 38 | 39 | id_to_wiki_cat = {row[0] : get_ent_group(get_entity_by_wiki_name(row[3])) if row[3] is not None else None for row in results} 40 | 41 | for phrase_id, wiki_cat in id_to_wiki_cat.items(): 42 | if wiki_cat is not None: 43 | execute_sql(f"update labeled_phrases_cat set wiki_category = '{wiki_cat}' where id = {phrase_id}") 44 | -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/policies/short_soft_disagree_policy.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from chirpy.response_generators.opinion2.state_actions import Action, State, AdditionalFeatures 3 | from chirpy.response_generators.opinion2.abstract_policy import Policy 4 | 5 | 6 | class ShortSoftDisagreePolicy(Policy): 7 | 8 | def __repr__(self) -> str: 9 | return "ShortSoftDisagreePolicy" 10 | 11 | def disagree_reason_agree(self, state : State, action_space : List[Action]): 12 | action = Action(sentiment=4-state.cur_sentiment, give_agree=True, give_reason=True, solicit_agree=True) 13 | if action not in action_space: 14 | action = Action(exit=True) 15 | return action 16 | 17 | def get_action(self, state : State, action_space : List[Action], additional_features : AdditionalFeatures) -> Action: 18 | """On a high level, this policy first disagree with the user softly, then switch 19 | to a different entity and completely agree with the user. 20 | 21 | :param state: [description] 22 | :type state: State 23 | :param action_space: [description] 24 | :type action_space: List[Action] 25 | :param additional_features: [description] 26 | :type additional_features: AdditionalFeatures 27 | :return: [description] 28 | :rtype: Action 29 | """ 30 | number_of_switches = len([action for action in state.action_history if action.suggest_alternative]) 31 | action = Action(exit=True) 32 | if len(state.action_history) == 0: 33 | user_sentiment_history = dict(state.user_sentiment_history) 34 | if state.cur_phrase not in user_sentiment_history: 35 | return Action(solicit_opinion=True) 36 | else: 37 | return Action(solicit_reason=True) 38 | prev_action = state.action_history[-1] 39 | if prev_action.solicit_opinion or prev_action.suggest_alternative: 40 | if state.cur_sentiment == 2: 41 | action = Action(exit=True) 42 | elif additional_features.detected_user_gave_reason: 43 | action = self.disagree_reason_agree(state, action_space) 44 | elif number_of_switches == 0: 45 | action = Action(solicit_reason=True) 46 | elif prev_action.solicit_reason: 47 | if not additional_features.detected_user_disinterest: 48 | action = self.disagree_reason_agree(state, action_space) 49 | return action -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/policies/two_turn_agree_policy.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from chirpy.response_generators.opinion2.state_actions import Action, State, AdditionalFeatures 3 | from chirpy.response_generators.opinion2.abstract_policy import Policy 4 | 5 | 6 | class TwoTurnAgreePolicy(Policy): 7 | 8 | def __repr__(self) -> str: 9 | return "TwoTurnAgreePolicy" 10 | 11 | def agree_reason_agree(self, state : State, action_space : List[Action]): 12 | action = Action(sentiment=state.cur_sentiment, give_agree=True, give_reason=True, solicit_agree=True) 13 | if action not in action_space: 14 | action = Action(exit=True) 15 | return action 16 | 17 | def get_action(self, state : State, action_space : List[Action], additional_features : AdditionalFeatures) -> Action: 18 | """On a high level, this policy first disagree with the user softly, then switch 19 | to a different entity and completely agree with the user. 20 | 21 | :param state: [description] 22 | :type state: State 23 | :param action_space: [description] 24 | :type action_space: List[Action] 25 | :param additional_features: [description] 26 | :type additional_features: AdditionalFeatures 27 | :return: [description] 28 | :rtype: Action 29 | """ 30 | action = Action(exit=True) 31 | if len(state.action_history) == 0: 32 | user_sentiment_history = dict(state.user_sentiment_history) 33 | if state.cur_phrase not in user_sentiment_history: 34 | return Action(solicit_opinion=True) 35 | else: 36 | return Action(solicit_reason=True) 37 | prev_action = state.action_history[-1] 38 | if prev_action.solicit_opinion or prev_action.suggest_alternative: 39 | if state.cur_sentiment == 2: 40 | action = Action(exit=True) 41 | elif additional_features.detected_user_gave_reason: 42 | action = self.agree_reason_agree(state, action_space) 43 | else: 44 | action = Action(solicit_reason=True) 45 | elif prev_action.solicit_reason: 46 | if not additional_features.detected_user_disinterest: 47 | action = self.agree_reason_agree(state, action_space) 48 | return action 49 | -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/user_interest/common_solicit_agree_responses.csv: -------------------------------------------------------------------------------- 1 | yes,4112 2 | yeah,2039 3 | no,1803 4 | sure,285 5 | yup,215 6 | yes i do,180 7 | i don't know,118 8 | kind of,100 9 | sometimes,98 10 | not really,88 11 | i agree,76 12 | i guess,56 13 | nope,45 14 | i do,42 15 | yes i agree,41 16 | yes yes,41 17 | yes you are,40 18 | absolutely,38 19 | maybe,36 20 | oh yeah,36 21 | no no,36 22 | definitely,35 23 | okay,32 24 | what,32 25 | no not really,31 26 | right,30 27 | yeah i do,28 28 | yeah sure,27 29 | yeah you're right,27 30 | uh-huh,27 31 | oh yes,26 32 | totally,26 33 | yes you're right,23 34 | yeah yeah,22 35 | of course,22 36 | you're right,21 37 | yeah i agree,21 38 | repeat,17 39 | yeah kind of,17 40 | yes you are right,17 41 | i guess so,16 42 | you are right,15 43 | yeah i guess,15 44 | no yes,14 45 | i disagree,13 46 | exactly,13 47 | yes we can,13 48 | sort of,13 49 | no i don't,13 50 | yeah yes,12 51 | -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/user_interest/common_solicit_opinion_responses.csv: -------------------------------------------------------------------------------- 1 | yes,5697 2 | no,3019 3 | yeah,1862 4 | yes i do,285 5 | not really,270 6 | sometimes,239 7 | kind of,175 8 | i do,134 9 | yup,105 10 | sure,102 11 | yes yes,86 12 | no not really,71 13 | what,68 14 | i don't know,68 15 | change topic,51 16 | of course,50 17 | it's okay,49 18 | oh yeah,43 19 | i love dogs,43 20 | no no,40 21 | nope,40 22 | i love animals,39 23 | repeat,38 24 | i love them,38 25 | i love it,37 26 | no i don't,36 27 | oh yes,35 28 | yeah i do,34 29 | okay,32 30 | yes i love it,31 31 | a little bit,31 32 | yes do you,27 33 | sort of,26 34 | i guess,25 35 | yes i am,25 36 | what did you say,24 37 | i love cats,22 38 | i am a kid,22 39 | yes i love them,22 40 | do i like what,21 41 | not that much,21 42 | uh yeah,21 43 | no yes,21 44 | of course i do,20 45 | no i hate it,20 46 | a lot,20 47 | yes no,19 48 | yes a lot,19 49 | huh,18 50 | yes very much,17 51 | -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/user_interest/common_solicit_reason_responses.csv: -------------------------------------------------------------------------------- 1 | yes,1002 2 | yeah,713 3 | no,650 4 | i don't know,434 5 | same,285 6 | me too,188 7 | i agree,147 8 | because,133 9 | sure,92 10 | yup,92 11 | okay,81 12 | it's fun,66 13 | because it's fun,58 14 | it's boring,56 15 | well,51 16 | i don't,47 17 | yeah me too,45 18 | good,38 19 | the same,35 20 | i feel the same way,35 21 | because it's boring,35 22 | what,33 23 | not really,32 24 | repeat,31 25 | they're cute,27 26 | i like dogs,27 27 | same here,26 28 | i don't like it,25 29 | nothing,25 30 | same thing,25 31 | i like,23 32 | what did you say,22 33 | uh,22 34 | i like it,22 35 | they're fun,22 36 | yeah same,21 37 | me,20 38 | yes i do,20 39 | cool,19 40 | i do,18 41 | boring,18 42 | because they're fun,18 43 | because they're cute,18 44 | i,18 45 | i just don't,18 46 | cause,17 47 | 2,17 48 | i like animals,16 49 | i guess,16 50 | yes i am,15 51 | -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/user_interest/common_solicit_reason_responses_labeled.csv: -------------------------------------------------------------------------------- 1 | yes,continue,haojun 2 | yeah,continue,haojun 3 | no,exit,haojun 4 | i don't know,exit,haojun 5 | same,continue,haojun 6 | me too,continue,haojun 7 | i agree,continue,haojun 8 | because,exit,haojun 9 | sure,continue,haojun 10 | yup,continue,haojun 11 | okay,continue,haojun 12 | it's fun,continue,haojun 13 | because it's fun,continue,haojun 14 | it's boring,exit,haojun 15 | well,exit,haojun 16 | i don't,exit,haojun 17 | yeah me too,continue,haojun 18 | good,continue,haojun 19 | the same,continue,haojun 20 | i feel the same way,continue,haojun 21 | because it's boring,continue,haojun 22 | what,exit,haojun 23 | not really,exit,haojun 24 | repeat,exit,haojun 25 | they're cute,continue,haojun 26 | i like dogs,continue,haojun 27 | same here,continue,haojun 28 | i don't like it,exit,haojun 29 | nothing,exit,haojun 30 | same thing,continue,haojun 31 | i like,exit,haojun 32 | what did you say,exit,haojun 33 | uh,exit,haojun 34 | i like it,exit,haojun 35 | they're fun,continue,haojun 36 | yeah same,continue,haojun 37 | me,exit,haojun 38 | yes i do,continue,haojun 39 | cool,continue,haojun 40 | i do,continue,haojun 41 | boring,exit,haojun 42 | because they're fun,continue,haojun 43 | because they're cute,continue,haojun 44 | i,exit,haojun 45 | i just don't,exit,haojun 46 | cause,exit,haojun 47 | 2,exit,haojun 48 | i like animals,exit,haojun 49 | i guess,exit,haojun 50 | yes i am,continue,haojun 51 | likewise,continue,haojun -------------------------------------------------------------------------------- /chirpy/response_generators/opinion2/user_interest/label.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import os 3 | import csv 4 | import argparse 5 | 6 | def read_csv(path): 7 | with open(path, 'r') as f: 8 | reader = csv.reader(f) 9 | return list(reader) 10 | 11 | def write_to_csv(path, responses): 12 | with open(path, 'a') as f: 13 | writer = csv.writer(f) 14 | writer.writerows(responses) 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('annotator', type=str, help='The name of the annotator') 18 | ANNOTATOR = parser.parse_args().annotator 19 | PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'common_solicit_reason_responses.csv') 20 | WRITE_TO_PATH = PATH.replace('.csv', '_labeled.csv') 21 | potential_labels = {1: 'continue', 2: 'exit'} 22 | 23 | def label1(path): 24 | utterances = [utterance for utterance, count in read_csv(path)] 25 | responses = [] 26 | for utterance in utterances: 27 | response = '' 28 | while not response.isdigit(): 29 | print(f'>> \033[92m{utterance}\033[00m {potential_labels}') 30 | response = input('? ') 31 | if response == 'exit': 32 | return responses 33 | responses.append((utterance, potential_labels[int(response)], ANNOTATOR)) 34 | return responses 35 | 36 | def inter_annotator_agreement(path): 37 | rows = read_csv(path) 38 | grouped_by_annotator = defaultdict(dict) 39 | for utterance, label, annotator in rows: 40 | grouped_by_annotator[annotator][utterance] = label 41 | utterances = set(row[0] for row in rows) 42 | agree_lst = [] 43 | disagree_lst = [] 44 | for utterance in utterances: 45 | labels = set([labeled[utterance] for labeled in grouped_by_annotator.values()]) 46 | if len(set(labels)) == 1: 47 | agree_lst += [utterance] 48 | else: 49 | disagree_lst += [(utterance, labels)] 50 | return agree_lst, disagree_lst, len(agree_lst) / (len(agree_lst) + len(disagree_lst)) 51 | 52 | if __name__ == "__main__": 53 | responses = label1(PATH) 54 | write_to_csv(WRITE_TO_PATH, responses) 55 | agrees, disagrees, ratio = inter_annotator_agreement(WRITE_TO_PATH) 56 | print(ratio) 57 | print(disagrees) -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/personal_issues/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | # from .first_person_template import FirstPersonRegexTemplate 2 | from .negative_emotion_template import NegativeEmotionRegexTemplate 3 | from .yes_template import YesTemplate 4 | from .gratitude_template import GratitudeTemplate, NegatedGratitudeTemplate 5 | from .personal_pronoun_template import PersonalPronounRegexTemplate 6 | from .change_topic_template import ChangeTopicTemplate 7 | from .personal_sharing_template import PersonalSharingTemplate, PersonalSharingContinuedTemplate 8 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/regex_templates/change_topic_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE 3 | 4 | CHANGE_TOPIC_PHRASE = [ 5 | "talk about", 6 | "tell me about" 7 | ] 8 | 9 | 10 | class ChangeTopicTemplate(RegexTemplate): 11 | slots = { 12 | 'change_topic': CHANGE_TOPIC_PHRASE 13 | } 14 | templates = [ 15 | OPTIONAL_TEXT_PRE + "{change_topic}" + OPTIONAL_TEXT_POST, 16 | ] 17 | positive_examples = [ 18 | ("let's talk about grand theft auto", {'change_topic': 'talk about'}), 19 | ("i don't want to talk about it", {'change_topic': 'talk about'}), 20 | ('can we talk about food', {'change_topic': 'talk about'}), 21 | ('can you tell me about wolves', {'change_topic': 'tell me about'}) 22 | ] 23 | negative_examples = [ 24 | "No, there isn't a problem", 25 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/regex_templates/first_person_template.py: -------------------------------------------------------------------------------- 1 | # CC: Not being used 2 | # from chirpy.core.regex.regex_template import RegexTemplate 3 | # from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE 4 | # 5 | # FIRST_PERSON_WORDS = ["i", "i'd", "i've", "i'll", "i'm", "my", "me", "myself", "we", "we'd", "we're", "we've", "we'll"] 6 | # 7 | # class FirstPersonRegexTemplate(RegexTemplate): 8 | # slots = { 9 | # 'first_person_word': FIRST_PERSON_WORDS 10 | # } 11 | # templates = [ 12 | # OPTIONAL_TEXT_PRE + "{first_person_word}" + OPTIONAL_TEXT_POST, 13 | # ] 14 | # positive_examples = [ 15 | # ("i'm having a hard time", {'first_person_word': "i'm"}), 16 | # ("Yeah, that would be nice. i wanted to talk about this.", {'first_person_word', "i"}), 17 | # ("it's not that i won't do it, it's that i'd rather not", {'first_person_word': "i"}), 18 | # ("all by myself", {'first_person_word': "myself"}), 19 | # ("why would they do this to me", {'first_person_word': "me"}) 20 | # ] 21 | # negative_examples = [ 22 | # "No, there isn't a problem", 23 | # 'Did you want to talk about something?', 24 | # ] -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/regex_templates/gratitude_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 3 | 4 | GRATITUDE_WORDS = ['helpful', 'appreciate', 'nice', 'thanks', 'thank', 'thank you', 'awesome', 'lovely', 'grateful'] 5 | NEGATING_WORDS = ["not", "doesn't", "isn't", "don't", "won't", "wouldn't", "can't", "shouldn't", 6 | "couldn't", "wasn't", "didn't", "shan't", "ain't", "aren't", "no"] 7 | 8 | 9 | class GratitudeTemplate(RegexTemplate): 10 | slots = { 11 | 'gratitude_word': GRATITUDE_WORDS, 12 | } 13 | 14 | templates = [ 15 | OPTIONAL_TEXT_PRE + "{gratitude_word}" + OPTIONAL_TEXT_POST 16 | ] 17 | 18 | positive_examples = [ 19 | ("thank you very much", {'gratitude_word': 'thank'}), 20 | ("thanks for saying that", {'gratitude_word': 'thanks'}), 21 | ("i appreciate that", {'gratitude_word': 'appreciate'}), 22 | ("you're an awesome listener", {'gratitude_word': 'awesome'}) 23 | ] 24 | 25 | negative_examples = [ 26 | "i don't know", 27 | "where's the beef", 28 | # "i don't appreciate you saying that", 29 | # "no thanks" 30 | ] 31 | 32 | 33 | class NegatedGratitudeTemplate(RegexTemplate): 34 | slots = { 35 | 'gratitude_word': GRATITUDE_WORDS, 36 | 'negator': NEGATING_WORDS 37 | } 38 | 39 | templates = [ 40 | OPTIONAL_TEXT_PRE + "{negator}" + OPTIONAL_TEXT_MID + "{gratitude_word}" + OPTIONAL_TEXT_POST 41 | ] 42 | 43 | positive_examples = [ 44 | ("no thanks", {'negator': 'no', 'gratitude_word': 'thanks'}), 45 | # ("i don't think that's nice", {'negator': "don't", ''}, 46 | ("that's not helpful", {'negator': 'not', 'gratitude_word': 'helpful'}) 47 | ] 48 | 49 | negative_examples = [ 50 | "thank you, that was helpful" 51 | ] 52 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/regex_templates/negative_emotion_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 3 | 4 | NEGATIVE_EMOTION_WORDS = ['angry', 'annoyed', 'anxious', 'ashamed', 'awful', 'awkward', 'bitter', 5 | 'challenging', 'cried', 'cry', 'depressed', 'depressing', 'desperate', 6 | 'difficult', 'disappointed', 'disappointing', 'disgusted', 'frustrated', 'frustrating', 7 | 'hopeless', 'horrible', 'hurt', 8 | 'irritated', 'miserable', 'nervous', 'overwhelmed', 'painful', 'pissed', 'sad', 9 | 'saddening', 'stressful', 'terrible', 'tired', 'tough', 'unbearable', 'uncomfortable', 10 | 'unhappy', 'unpleasant', 'upset', 'upsetting', 'worried', #'hate', 'hated', 'hating', 11 | 'lonely', 'isolated'] 12 | 13 | POSITIVE_EMOTION_WORDS = ["happy", "joyful", "calm", "impressed", "pleased", "elated", "good", "great", "awesome"] 14 | NEGATING_WORDS = ["not", "doesn't", "isn't", "don't", "won't", "wouldn't", "can't", "shouldn't", 15 | "couldn't", "wasn't", "didn't", "shan't", "ain't", "aren't"] 16 | 17 | class NegativeEmotionRegexTemplate(RegexTemplate): 18 | slots = { 19 | 'negative_emotion': NEGATIVE_EMOTION_WORDS, 20 | 'positive_emotion': POSITIVE_EMOTION_WORDS, 21 | 'negator': NEGATING_WORDS 22 | } 23 | templates = [ 24 | OPTIONAL_TEXT_PRE + "{negative_emotion}" + OPTIONAL_TEXT_POST, 25 | OPTIONAL_TEXT_PRE + "{negator}" + OPTIONAL_TEXT_MID + "{positive_emotion}" + OPTIONAL_TEXT_POST, 26 | ] 27 | positive_examples = [ 28 | ("i'm feeling pretty sad", {'negative_emotion': "sad"}), 29 | ("all of this doesn't make me very happy", {'positive_emotion': "happy", 'negator': "doesn't"}), 30 | ("it's lonely", {'negative_emotion': 'lonely'}) 31 | ] 32 | negative_examples = [ 33 | "i'm pretty happy about how this turned out", 34 | 'did you want to talk about something?', 35 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/regex_templates/personal_pronoun_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE 3 | 4 | # https://www.thefreedictionary.com/List-of-pronouns.htm 5 | PRONOUN_WORDS = [ 6 | "i", "i'd", "i've", "i'll", "i'm" 7 | "we", "we'd", "we're", "we've", "we'll", "us", "ours", 8 | "he", "he'd", "he'll", "he's", "him", "his", 9 | "she", "she'd", "she'll", "she's", "her", "hers", 10 | "they", "they'd", "they'll", "they've", "they're", "them", "theirs" 11 | "me", "my", "myself", "mine" 12 | ] # specifically, personal/object/possessive pronouns + contractions, but you/yours, etc excluded 13 | 14 | class PersonalPronounRegexTemplate(RegexTemplate): 15 | slots = { 16 | 'pronoun_word': PRONOUN_WORDS 17 | } 18 | templates = [ 19 | OPTIONAL_TEXT_PRE + "{pronoun_word}" + OPTIONAL_TEXT_POST, 20 | ] 21 | positive_examples = [ 22 | ("the doctors aren't sure if she will walk again", {'pronoun_word': "she"}), 23 | ("he'd call us names since we were young.", {'pronoun_word': "us"}), 24 | ("she'll be unhappy if I disappoint her", {'pronoun_word': "her"}), 25 | ("all by myself", {'pronoun_word': "myself"}), 26 | ("why would they do this to me", {'pronoun_word': "they"}) 27 | ] 28 | negative_examples = [ 29 | "No, there isn't a problem", 30 | 'Did you want to talk about something?', 31 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/regex_templates/yes_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from movies RG's YesTemplate 3 | """ 4 | 5 | from chirpy.core.regex.regex_template import RegexTemplate 6 | from chirpy.core.regex.util import OPTIONAL_TEXT, OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 7 | 8 | YES_WORDS = [ 9 | "yes", 10 | "all right", 11 | "alright", 12 | "very well", 13 | "of course", 14 | "by all means", 15 | "sure", 16 | "certainly", 17 | "absolutely", 18 | "indeed", 19 | "right", 20 | "affirmative", 21 | "in the affirmative", 22 | "agreed", 23 | "roger", 24 | "aye aye", 25 | "yeah", 26 | "yep", 27 | "yup", 28 | "ya", 29 | "uh-huh", 30 | "okay", 31 | "ok", 32 | "okey-dokey", 33 | "okey-doke", 34 | "yea", 35 | "aye", 36 | "course", 37 | "duh" 38 | ] 39 | 40 | class YesTemplate(RegexTemplate): 41 | slots = { 42 | 'yes_word': YES_WORDS, 43 | } 44 | templates = [ 45 | OPTIONAL_TEXT_PRE + "{yes_word}" + OPTIONAL_TEXT_POST 46 | ] 47 | positive_examples = [ 48 | ("yes let's keep talking", {'yes_word': 'yes'}), 49 | ("alright i will keep talking", {'yes_word': 'alright'}) 50 | ] 51 | 52 | negative_examples = [ 53 | "i don't want to talk about this any more", 54 | "can we talk about something else" 55 | ] 56 | 57 | 58 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/response_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .first_turn_response_template import FirstTurnResponseTemplate 2 | from .ending_response_template import EndingResponseTemplate 3 | from .subsequent_turn_response_template import * 4 | from .possible_continue_response_templates import * 5 | from .gpt_prefix_response_template import GPTPrefixResponseTemplate 6 | # from .backchannel_response_template import BackchannelResponseTemplate -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/response_templates/ending_response_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 2 | from chirpy.response_generators.personal_issues.response_templates.response_components import STATEMENTS_THANKING, \ 3 | STATEMENTS_EXPRESS_OPINION, STATEMENTS_OFFER_LISTEN, STATEMENTS_CHANGE_SUBJECT, STATEMENTS_REASSURANCE 4 | 5 | STATEMENTS_LISTEN_NEXT_TIME = [ 6 | "I'm always here to listen if you need it.", 7 | "I'm always happy to listen if you'd like to talk about this again.", 8 | "I'll be here with a listening ear if you'd like to talk about this again." 9 | "" 10 | ] 11 | 12 | class EndingResponseTemplate(ResponseTemplateFormatter): 13 | slots = { 14 | "statement_thanking": STATEMENTS_THANKING, 15 | "express_opinion": STATEMENTS_EXPRESS_OPINION, 16 | "reassurance": STATEMENTS_REASSURANCE, 17 | "offer": STATEMENTS_LISTEN_NEXT_TIME, # note: a bit awkward to have this after express_opinion 18 | # example: Thanks for sharing this with me. I'm happy that we got to talk about this. I'm here to listen to you. Do you want to talk about something else? 19 | "change_subject": STATEMENTS_CHANGE_SUBJECT 20 | } 21 | 22 | templates = [ 23 | "{statement_thanking} {express_opinion} and {reassurance} {change_subject}", 24 | "{statement_thanking} {express_opinion} {change_subject}", 25 | "{statement_thanking} {reassurance} and {offer} {change_subject}" 26 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/response_templates/first_turn_response_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.response_generators.personal_issues.response_templates.response_components import STATEMENTS_OFFER_LISTEN, \ 2 | BEGIN_LISTEN, STATEMENTS_VALIDATE, FIRST_TURN_VALIDATE 3 | from chirpy.core.regex.regex_template import RegexTemplate 4 | from chirpy.core.regex.util import OPTIONAL_TEXT, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST 5 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 6 | 7 | import logging 8 | 9 | logger = logging.getLogger('chirpylogger') 10 | 11 | 12 | class FirstTurnResponseTemplate(ResponseTemplateFormatter): 13 | slots = { 14 | "begin_listen": BEGIN_LISTEN, 15 | "validate": FIRST_TURN_VALIDATE, 16 | "encourage_sharing": STATEMENTS_OFFER_LISTEN 17 | } 18 | 19 | templates = [ 20 | "{begin_listen} {validate} {encourage_sharing}" 21 | ] 22 | 23 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/response_templates/gpt_prefix_response_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.response_generators.personal_issues.response_templates.response_components import * 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | from chirpy.core.regex.util import OPTIONAL_TEXT, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_POST 4 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 5 | 6 | import logging 7 | 8 | logger = logging.getLogger('chirpylogger') 9 | 10 | PRIMER = [ 11 | "hopefully", 12 | "I hope" 13 | ] 14 | 15 | class GPTPrefixResponseTemplate(ResponseTemplateFormatter): 16 | slots = { 17 | "validate": STATEMENTS_VALIDATE, 18 | "primer": PRIMER 19 | } 20 | 21 | templates = [ 22 | "{validate} {primer}" 23 | ] 24 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/response_templates/possible_continue_response_templates.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 2 | from chirpy.response_generators.personal_issues.response_templates.response_components import SUB_ACKNOWLEDGEMENT, \ 3 | QUESTIONS_ANYTHING_ELSE, QUESTIONS_ENCOURAGE_SHARING, \ 4 | QUESTIONS_CHANGE_SUBJECT, STATEMENTS_EXPRESS_CONFUSION, STATEMENTS_OFFER_LISTEN, SUB_YES 5 | 6 | class PossibleContinueResponseTemplate(ResponseTemplateFormatter): 7 | slots = { 8 | "acknowledge": SUB_ACKNOWLEDGEMENT, 9 | "continue_questions": QUESTIONS_ANYTHING_ELSE + QUESTIONS_ENCOURAGE_SHARING #+ QUESTIONS_CHANGE_SUBJECT, 10 | } 11 | 12 | templates = [ 13 | "{acknowledge} {continue_questions}" 14 | ] 15 | 16 | 17 | class ConfusedPossibleContinueResponseTemplate(ResponseTemplateFormatter): 18 | slots = { 19 | "confusion": STATEMENTS_EXPRESS_CONFUSION, 20 | "continue_questions": QUESTIONS_ANYTHING_ELSE + QUESTIONS_CHANGE_SUBJECT, 21 | } 22 | 23 | templates = [ 24 | "{confusion} {continue_questions}" 25 | ] 26 | 27 | 28 | class PossibleContinueAcceptedResponseTemplate(ResponseTemplateFormatter): 29 | slots = { 30 | "ok": SUB_YES, 31 | "here_to_listen": STATEMENTS_OFFER_LISTEN, 32 | # "anything_else": QUESTIONS_ANYTHING_ELSE // avoid repeating from PossibleContinue 33 | } 34 | 35 | templates = [ 36 | "{ok} {here_to_listen}", 37 | # "{ok} {anything_else}" 38 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/response_templates/subsequent_turn_response_template.py: -------------------------------------------------------------------------------- 1 | 2 | from chirpy.annotators.corenlp import Sentiment 3 | from chirpy.response_generators.personal_issues.response_templates.response_components import * 4 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 5 | 6 | import logging 7 | 8 | logger = logging.getLogger('chirpylogger') 9 | 10 | 11 | class SubsequentTurnResponseTemplate(ResponseTemplateFormatter): 12 | slots = { 13 | "validate": FIRST_TURN_VALIDATE + STATEMENTS_VALIDATE, 14 | "question": QUESTIONS_REFLECTIVE + QUESTIONS_SOLUTION, 15 | "sharing": STATEMENTS_OFFER_LISTEN 16 | } 17 | 18 | templates = [ 19 | "{validate} {question}", 20 | "{validate} {sharing}" 21 | ] 22 | 23 | 24 | """ 25 | For use in conjunction with GPT2's initial response 26 | """ 27 | class PartialSubsequentTurnResponseTemplate(ResponseTemplateFormatter): 28 | slots = { 29 | "question": QUESTIONS_REFLECTIVE + QUESTIONS_SOLUTION, 30 | "sharing": STATEMENTS_OFFER_LISTEN 31 | } 32 | 33 | templates = [ 34 | "{question}", 35 | "{sharing}" 36 | ] 37 | 38 | 39 | class ValidationResponseTemplate(ResponseTemplateFormatter): 40 | slots = { 41 | 'acknowledgment': SUB_ACKNOWLEDGEMENT, 42 | 'validate': STATEMENTS_VALIDATE 43 | } 44 | 45 | templates = [ 46 | "{acknowledgment} {validate}" 47 | ] 48 | 49 | 50 | class BackchannelResponseTemplate(ResponseTemplateFormatter): 51 | slots = { 52 | "backchannel": SUB_ACKNOWLEDGEMENT 53 | } 54 | 55 | templates = [ 56 | "{backchannel}" 57 | ] 58 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/state.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.state import * 2 | 3 | @dataclass 4 | class State(BaseState): 5 | personal_issue_score: int = 0 6 | question_last_turn: bool = False 7 | neural_last_turn: bool = False 8 | 9 | @dataclass 10 | class ConditionalState(BaseConditionalState): 11 | personal_issue_score: int = NO_UPDATE 12 | question_last_turn: bool = False 13 | neural_last_turn: bool = False 14 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/treelets/__init__.py: -------------------------------------------------------------------------------- 1 | from .first_turn_treelet import FirstTurnTreelet 2 | from .subsequent_turn_treelet import SubsequentTurnTreelet 3 | from .possible_continue_treelet import PossibleContinueTreelet 4 | from .ending_treelet import EndingTreelet 5 | from .possible_continue_accepted_treelet import PossibleContinueAcceptedTreelet 6 | # from .backchannel_treelet import BackchannelTreelet 7 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/treelets/ending_treelet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Trenton Chang, Caleb Chiam - Nov. 2020 3 | ending_treelet.py 4 | 5 | The following treelet is a core component in the personal issues response generator. This treelet is intended to provide a natural 6 | way for Chirpy to end this phase of the conversation with the user, and should be used when the conversation has reached its end 7 | NOT due to negative navigational intent. Subsequent turns may change the subject, or continue talking about the user's personal issue. 8 | 9 | 10 | """ 11 | 12 | from chirpy.core.response_generator import Treelet 13 | from chirpy.response_generators.personal_issues.state import State, ConditionalState 14 | from chirpy.response_generators.personal_issues.response_templates.ending_response_template import \ 15 | EndingResponseTemplate 16 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult 17 | from chirpy.response_generators.personal_issues.personal_issues_helpers import ResponseType 18 | 19 | import logging 20 | from typing import Set # NOQA 21 | logger = logging.getLogger('chirpylogger') 22 | 23 | class EndingTreelet(Treelet): 24 | """ 25 | Provides an option to naturally end the conversation. 26 | """ 27 | name = "personal_issues_ending" 28 | 29 | def get_response(self, priority): 30 | """Generates a ResponseGeneratorResult containing that treelet's response to the user. 31 | 32 | Args: 33 | state (State): representation of the RG's internal state; see state.py for definition. 34 | utterance (str): what the user just said 35 | response_types (Set[ResponseType]): type of response given by the user; see ...personal_issues/personal_issues_utils.py 36 | for a full definition of the ResponseTypes enum. 37 | priority (ResponsePriority): five-level response priority tier. 38 | 39 | Returns: 40 | ResponseGeneratorResult: an object encapsulating the textual response given by the RG, and some metadata. 41 | """ 42 | state, utterance, response_types = self.get_state_utterance_response_types() 43 | template = EndingResponseTemplate() 44 | response = template.sample() 45 | conditional_state = ConditionalState(prev_treelet_str=self.name, 46 | next_treelet_str='transition') 47 | return ResponseGeneratorResult(text=response, priority=priority, needs_prompt=True, state=state, 48 | cur_entity=None, conditional_state=conditional_state) 49 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/treelets/first_turn_treelet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Trenton Chang, Caleb Chiam - Nov. 2020 3 | first_turn_treelet.py 4 | 5 | The following treelet initiates the conversation about personal issues when prompted by the user (i.e. a negative 6 | personal disclosure was detected). 7 | """ 8 | import logging 9 | 10 | # RG IMPORTS 11 | from chirpy.response_generators.personal_issues.response_templates.first_turn_response_template import FirstTurnResponseTemplate 12 | from chirpy.core.response_generator import * 13 | from chirpy.response_generators.personal_issues.state import State, ConditionalState 14 | 15 | # CORE MODULE IMPORTS 16 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult 17 | logger = logging.getLogger('chirpylogger') 18 | 19 | 20 | class FirstTurnTreelet(Treelet): 21 | name = "personal_issues_first_turn" 22 | 23 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 24 | """Generates a ResponseGeneratorResult containing that treelet's response to the user. 25 | 26 | Args: 27 | state (State): representation of the RG's internal state; see state.py for definition. 28 | utterance (str): what the user just said 29 | response_types (ResponseTypes): type of response given by the user; see ...personal_issues/personal_issues_utils.py 30 | for a full definition of the ResponseTypes enum. 31 | priority (ResponsePriority): five-level response priority tier. 32 | 33 | Returns: 34 | ResponseGeneratorResult: an object encapsulating the textual response given by the RG, and some metadata. 35 | """ 36 | state, utterance, response_types = self.get_state_utterance_response_types() 37 | template = FirstTurnResponseTemplate() 38 | response = template.sample() 39 | conditional_state = ConditionalState(prev_treelet_str=self.name, 40 | next_treelet_str='transition') 41 | return ResponseGeneratorResult(text=response, priority=priority, 42 | needs_prompt=False, state=state, 43 | cur_entity=None, conditional_state=conditional_state) 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /chirpy/response_generators/personal_issues/treelets/possible_continue_treelet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Trenton Chang, Caleb Chiam - Nov. 2020 3 | possible_continue_treelet.py 4 | 5 | This treelet triggers when the user expresses noncommittal/disinterested feelings, and asks whether the user would 6 | like to continue talking to this RG. 7 | 8 | """ 9 | from chirpy.core.response_generator_datatypes import ResponseGeneratorResult, ResponsePriority, emptyResult 10 | from chirpy.core.response_generator import Treelet 11 | from chirpy.response_generators.personal_issues.state import State, ConditionalState 12 | from chirpy.response_generators.personal_issues.personal_issues_helpers import ResponseType 13 | from chirpy.response_generators.personal_issues.response_templates import PossibleContinueResponseTemplate, \ 14 | ConfusedPossibleContinueResponseTemplate 15 | 16 | import logging 17 | logger = logging.getLogger('chirpylogger') 18 | 19 | 20 | class PossibleContinueTreelet(Treelet): 21 | """ 22 | Checks if the user wants to continue talking about his/her personal issue; 23 | specifically, when the user has already made some personal disclosures in previous turns, 24 | but is now giving noncommittal or possibly disinterested responses. 25 | 26 | """ 27 | name = "personal_issues_possible_continue" 28 | 29 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 30 | """Generates a ResponseGeneratorResult containing that treelet's response to the user. 31 | 32 | Args: 33 | state (State): representation of the RG's internal state; see state.py for definition. 34 | utterance (str): what the user just said 35 | response_types (Set[ResponseTypes]): type of response given by the user; see ...personal_issues/personal_issues_utils.py 36 | for a full definition of the ResponseTypes enum. 37 | priority (ResponsePriority): five-level response priority tier. 38 | 39 | Returns: 40 | ResponseGeneratorResult: an object encapsulating the textual response given by the RG, and some metadata. 41 | """ 42 | state, utterance, response_types = self.get_state_utterance_response_types() 43 | template = PossibleContinueResponseTemplate() 44 | response = template.sample() 45 | conditional_state = ConditionalState(prev_treelet_str=self.name, 46 | next_treelet_str='transition') 47 | return ResponseGeneratorResult(text=response, priority=priority, needs_prompt=False, state=state, 48 | cur_entity=None, conditional_state=conditional_state) 49 | -------------------------------------------------------------------------------- /chirpy/response_generators/red_question/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/red_question/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/red_question/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .are_you_recording_template import AreYouRecordingTemplate 2 | -------------------------------------------------------------------------------- /chirpy/response_generators/red_question/regex_templates/are_you_recording_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import OPTIONAL_TEXT_POST, OPTIONAL_TEXT_PRE, OPTIONAL_TEXT_MID 3 | 4 | class AreYouRecordingTemplate(RegexTemplate): 5 | slots = { 6 | 'RECORD': ['record', 'recorded', 'records', 'recording'], 7 | 'modifier': ['have', 'will be', 'will', 'are', 'like', 'like to', 'want to'] 8 | } 9 | 10 | templates = [ 11 | OPTIONAL_TEXT_PRE + "you {RECORD}" + OPTIONAL_TEXT_POST, 12 | OPTIONAL_TEXT_PRE + "you {modifier} {RECORD}" + OPTIONAL_TEXT_POST 13 | ] 14 | 15 | positive_examples = [ 16 | ("do you record conversations", {'RECORD': 'record'}), 17 | ("are you recording this", {'RECORD': 'recording'}), 18 | ("are you recording this without my consent", {'RECORD': 'recording'}), 19 | ("alexa are you recording this conversation", {'RECORD': 'recording'}), 20 | ("echo are you recording this conversation", {'RECORD': 'recording'}), 21 | ("alexa have you recorded our conversations", {'RECORD': 'recorded'}), 22 | ("i bet you have recorded our conversations", {"RECORD": "recorded", "modifier": "have"}), 23 | ("do you like recording conversations", {"modifier": "like", "RECORD": "recording"}) 24 | 25 | ] 26 | 27 | negative_examples = [ 28 | "the government might be recording this interaction", 29 | "i don't like people listening in on my conversations", 30 | "you know people like to record conversations" 31 | ] 32 | -------------------------------------------------------------------------------- /chirpy/response_generators/transition/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/transition/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/transition/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Set 3 | from chirpy.core.response_generator.state import * 4 | ### 5 | # Define the state that will be returned by all treelets 6 | ### 7 | 8 | @dataclass 9 | class State(BaseState): 10 | entities_prompted: Set[str] = field(default_factory=set) 11 | 12 | @dataclass 13 | class ConditionalState(BaseConditionalState): 14 | entities_prompted: Set[str] = NO_UPDATE 15 | from_entity: Optional['WikiEntity'] = NO_UPDATE 16 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/chirpy/response_generators/wiki2/__init__.py -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/category_associations.txt: -------------------------------------------------------------------------------- 1 | ('artist', 'human', 'politician'): politician 2 | ('app_or_website', 'general_technology'): app_or_website 3 | ('app_or_website', 'company', 'general_technology'): company 4 | ('academic_subject', 'group_of_people', 'human'): group_of_people 5 | ('fictional_character', 'toy'): toy 6 | ('human', 'politician'): politician 7 | ('actor', 'artist', 'athlete', 'human'): actor 8 | ('athlete', 'human'): athlete 9 | ('general_technology', 'toy'): general_technology 10 | ('artist', 'athlete', 'human'): athlete 11 | ('actor', 'artist', 'dancer', 'human', 'musician'): musician 12 | ('food', 'taxon'): food 13 | ('film', 'musical_work'): musical_work 14 | ('artist', 'human', 'musician'): musician 15 | ('animal', 'food'): food 16 | ('film', 'media_franchise'): media_franchise 17 | ('actor', 'artist', 'human', 'musician'): musician 18 | ('animal', 'food', 'taxon'): animal 19 | ('group_of_people', 'human'): group_of_people 20 | ('artist', 'human'): artist 21 | ('animal', 'taxon'): animal 22 | ('book', 'musical_work'): musical_work 23 | ('actor', 'artist', 'athlete', 'musician'): musician 24 | ('media_franchise', 'toy'): toy 25 | ('app_or_website', 'tv_channel'): app_or_website 26 | ('game', 'media_franchise'): game 27 | ('fictional_character', 'human'): fictional_character 28 | ('human', 'location', 'politician'): location 29 | ('app_or_website', 'general_technology', 'tv_channel'): app_or_website 30 | ('anime', 'tv_show'): tv_show 31 | ('actor', 'artist', 'comedian', 'human', 'musician'): actor 32 | ('actor', 'artist', 'dancer', 'fashion_designer', 'human', 'musician'): musician 33 | ('artist', 'dancer', 'human', 'musician'): musician 34 | ('actor', 'artist', 'human'): actor 35 | ('artist', 'athlete', 'human', 'politician', 'sport'): actor 36 | ('company', 'restaurant_chain'): restaurant_chain 37 | ('artist', 'human', 'musical_group'): musical_group 38 | ('athlete', 'fictional_character', 'human'): fictional_character 39 | ('actor', 'artist', 'comedian', 'human'): actor 40 | ('artist', 'fictional_character', 'human'): fictional_character -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/category_ranking.txt: -------------------------------------------------------------------------------- 1 | sport 2 | food 3 | actor 4 | animal 5 | company 6 | app_or_website 7 | tv_channel 8 | musical_instrument 9 | media_franchise 10 | athlete 11 | musician 12 | artist 13 | general_technology 14 | mythical_creature 15 | game 16 | restaurant_chain 17 | location 18 | film 19 | politician 20 | dancer 21 | tv_show 22 | fictional_character 23 | sports_team 24 | tourist_attraction 25 | comedian 26 | musical_group 27 | toy 28 | book 29 | musical_work 30 | academic_subject 31 | anime 32 | dance 33 | group_of_people 34 | mode_of_transport 35 | fashion_designer 36 | taxon 37 | human -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/doctest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for file in ./chirpy/response_generators/wiki/* 4 | do 5 | echo "$file" 6 | if [ -f $file ] 7 | then 8 | if [ ${file: -3} == ".py" ] 9 | then 10 | python -m doctest "$file" 11 | fi 12 | fi 13 | done 14 | 15 | for file in ./chirpy/response_generators/wiki/*/* 16 | do 17 | echo "$file" 18 | if [ -f $file ] 19 | then 20 | if [ ${file: -3} == ".py" ] 21 | then 22 | python -m doctest "$file" 23 | fi 24 | fi 25 | done -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/first_turn_templates_notes.md: -------------------------------------------------------------------------------- 1 | First turn templates are specifically designed such that the bot expresses a personal opinion or observation 2 | about the entity. It should not include TILs ("did you know...") or purely factual statements ("[entity] was born in ..."). 3 | Ideally, they should be two-parters, vaguely meaning: two sentences, where one sentence sets up the point, and the second one delivers it. 4 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/pronouns.py: -------------------------------------------------------------------------------- 1 | ANIMATE_ENTITY_GROUPS = [ 2 | 'musician', 3 | 'artist', 4 | 'fashion_designer', 5 | 'politician', 6 | 'comedian', 7 | 'actor' 8 | ] 9 | 10 | MASC_PRONOUNS = ['he', 'him', 'his'] 11 | FEM_PRONOUNS = ['she', 'her'] 12 | 13 | 14 | def guess_pronoun(sentences): 15 | pronouns = {'m': 0, 'f': 0} 16 | for sentence in sentences: 17 | for word in sentence.split(): 18 | word = word.lower() 19 | if word in MASC_PRONOUNS: pronouns['m'] += 1 20 | elif word in FEM_PRONOUNS: pronouns['f'] += 1 21 | key = max(pronouns, key=lambda p: pronouns[p]) 22 | key_to_pronoun = { 23 | 'm': ('he', 'him', 'his'), 24 | 'f': ('she', 'her', 'her') 25 | } 26 | return key_to_pronoun[key] 27 | 28 | 29 | def is_animate(ent_group): 30 | return ent_group in ANIMATE_ENTITY_GROUPS 31 | 32 | 33 | def get_pronoun(ent_group, sentences): 34 | if is_animate(ent_group): 35 | return guess_pronoun(sentences) 36 | else: 37 | return 'it', 'it', 'its' 38 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/regex_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .did_you_know_template import DidYouKnowQuestionTemplate 2 | from .confused_template import ClarificationQuestionTemplate, DoubtfulTemplate 3 | from .acknowledgment_template import PositiveAcknowledgementTemplate 4 | from .receptive_template import AppreciativeTemplate, KnowMoreTemplate, AgreementTemplate, DisagreementTemplate 5 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/regex_templates/acknowledgment_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.response_generators.wiki2.response_templates.response_components import POSITIVE_ACKNOWLEDGEMENTS 2 | from chirpy.core.regex.regex_template import RegexTemplate 3 | from chirpy.core.regex.util import * 4 | 5 | class PositiveAcknowledgementTemplate(RegexTemplate): 6 | slots = { 7 | 'acknowledgement': POSITIVE_ACKNOWLEDGEMENTS, 8 | } 9 | templates = [ 10 | OPTIONAL_TEXT_PRE + "{acknowledgement}" + OPTIONAL_TEXT_POST, 11 | ] 12 | positive_examples = [ 13 | ("that\'s cool", {'acknowledgement': 'cool'}), 14 | ] 15 | negative_examples = [ 16 | 'i don\'t understand', 17 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/regex_templates/did_you_know_template.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.regex.regex_template import RegexTemplate 2 | from chirpy.core.regex.util import * 3 | 4 | class DidYouKnowQuestionTemplate(RegexTemplate): 5 | slots = { 6 | 'q_word': ['did', 'do', 'have'], 7 | 'verb': ['know', 'learn', 'heard', 'like', 'try', 'tried', 'hear', 'love', 'loved'], 8 | } 9 | templates = [ 10 | OPTIONAL_TEXT_PRE_GREEDY + "{q_word} you (ever )?{verb}" + OPTIONAL_TEXT_POST 11 | ] 12 | positive_examples = [ 13 | ("oh yeah, yeah, I'll have to check those out. Have you heard about Google's geospatial data visualization company? It's called keyhole, and it's used in google Earth!".lower(), 14 | {'q_word': 'have', 'verb': 'heard'}), 15 | 16 | ("I love Hawaii and have been to Hawaii! Do you know about that island where they united by the great king Kamehameha?".lower(), 17 | {'q_word': 'do', 'verb': 'know'}), 18 | ("have you ever tried Blue, the banana?", 19 | {'q_word': 'have', 'verb': 'tried'}) 20 | ] 21 | negative_examples = [ 22 | "i guess he suffers from a form of the depression that has happened with people before" 23 | "I heard that Anton Salonen caused an international incident after his Finnish father, with the help of Finnish diplomats, kidnapped his son back after the boys Russian mother kidnapped the boy in the first place. I wonder if he's a Finnish citizen?", 24 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/response_templates/AcknowledgeUserKnowledgeTemplate.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 2 | from chirpy.response_generators.wiki2.response_templates.response_components import POS_ONE_WORD_ACKNOWLEDGEMENTS 3 | 4 | QUESTION_USER_INTEREST = [ 5 | "When did you start having an interest in {}?", 6 | "What got you interested in {}?", 7 | "How did you get interested in {}?", 8 | "Why are you interested in {}?", 9 | "When did you start being interested in {}?", 10 | "What made you interested in {}?" 11 | ] 12 | 13 | 14 | class AcknowledgeUserKnowledgeTemplate(ResponseTemplateFormatter): 15 | slots = { 16 | "ack": POS_ONE_WORD_ACKNOWLEDGEMENTS, 17 | "qn": QUESTION_USER_INTEREST 18 | } 19 | 20 | templates = [ 21 | "{ack}, {qn}" 22 | ] -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/response_templates/CheckUserKnowledgeTemplate.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.response_template import ResponseTemplateFormatter 2 | from chirpy.response_generators.wiki2.response_templates.response_components import GENERAL_BOT_ACKNOWLEDGEMENTS 3 | 4 | STATEMENTS_LISTEN_NEXT_TIME = [ 5 | "I'm always here to listen if you need it.", 6 | "I'm always happy to listen if you'd like to talk about this again.", 7 | "I'll be here with a listening ear if you'd like to talk about this again." 8 | "" 9 | ] 10 | 11 | QUESTION_USER_KNOWLEDGE = [ 12 | "Do you know a lot about {}?", 13 | "Do you happen to know a lot about {}?", 14 | "Are you pretty familiar with {}?", 15 | "Are you especially knowledgeable about {}?", 16 | "{is_are} {} {some_one_thing} you know a lot about?", 17 | "Would you say that {} {is_are} {some_one_thing} you know a lot about?", 18 | "Do you know a great deal about {}?", 19 | "Would I be correct in guessing that you know a lot about {}?" 20 | ] 21 | 22 | 23 | class CheckUserKnowledgeTemplate(ResponseTemplateFormatter): 24 | slots = { 25 | "ack": GENERAL_BOT_ACKNOWLEDGEMENTS, 26 | "qn": QUESTION_USER_KNOWLEDGE 27 | } 28 | 29 | templates = [ 30 | "{qn}" 31 | ] 32 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/response_templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .CheckUserKnowledgeTemplate import CheckUserKnowledgeTemplate 2 | from .AcknowledgeUserKnowledgeTemplate import AcknowledgeUserKnowledgeTemplate 3 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/treelets/__init__.py: -------------------------------------------------------------------------------- 1 | from .acknowledge_user_knowledge_treelet import AcknowledgeUserKnowledgeTreelet 2 | from .check_user_knowledge_treelet import CheckUserKnowledgeTreelet 3 | # from .open_question_treelet import OpenQuestionTreelet 4 | from .factoid_treelet import FactoidTreelet 5 | from .intro_entity_treelet import IntroEntityTreelet 6 | from .TILtreelet import TILTreelet 7 | from .discuss_article_treelet import DiscussArticleTreelet 8 | from .discuss_section_treelet import DiscussSectionTreelet 9 | from .discuss_section_further_treelet import DiscussSectionFurtherTreelet 10 | from .get_opinion_treelet import GetOpinionTreelet 11 | from .recheck_interest_treelet import RecheckInterestTreelet 12 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/treelets/acknowledge_user_knowledge_treelet.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.treelet import Treelet 2 | from chirpy.core.response_generator_datatypes import ResponsePriority, ResponseGeneratorResult 3 | from chirpy.response_generators.wiki2.state import ConditionalState 4 | from chirpy.response_generators.wiki2.response_templates import AcknowledgeUserKnowledgeTemplate 5 | 6 | 7 | class AcknowledgeUserKnowledgeTreelet(Treelet): 8 | name = "wiki_acknowledge_user_knowledge_treelet" 9 | 10 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 11 | entity = self.rg.state.cur_entity 12 | return ResponseGeneratorResult( 13 | text=AcknowledgeUserKnowledgeTemplate().sample().format(entity.talkable_name), 14 | priority=ResponsePriority.STRONG_CONTINUE, 15 | state=self.rg.state, needs_prompt=False, cur_entity=entity, 16 | conditional_state=ConditionalState(prev_treelet_str=self.name, 17 | next_treelet_str=self.rg.get_opinion_treelet.name) 18 | ) 19 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/treelets/check_user_knowledge_treelet.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.treelet import Treelet 2 | from chirpy.core.response_generator_datatypes import ResponsePriority, ResponseGeneratorResult 3 | from chirpy.response_generators.wiki2.state import ConditionalState 4 | from chirpy.response_generators.wiki2.response_templates import CheckUserKnowledgeTemplate 5 | 6 | import logging 7 | logger = logging.getLogger('chirpylogger') 8 | 9 | class CheckUserKnowledgeTreelet(Treelet): 10 | name = "wiki_check_user_knowledge_treelet" 11 | 12 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 13 | entity = self.rg.state.cur_entity 14 | ack = self.rg.get_acknowledgement(entity, allow_neural=True) 15 | 16 | is_person = any([cat.endswith('person') for cat in entity.wikidata_categories]) and \ 17 | all([not cat.endswith('company') for cat in entity.wikidata_categories]) 18 | text = CheckUserKnowledgeTemplate().sample().format(entity.talkable_name, 19 | is_are=['is', 'are'][int(entity.is_plural)], 20 | some_one_thing=['something', 'someone'][int(is_person)] 21 | ) 22 | 23 | if ack is not None: 24 | text = ack + " " + text 25 | return ResponseGeneratorResult( 26 | text=text, 27 | priority=priority, 28 | state=self.rg.state, needs_prompt=False, cur_entity=entity, 29 | conditional_state=ConditionalState(prev_treelet_str=self.name, 30 | next_treelet_str='transition') 31 | ) 32 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/treelets/factoid_treelet.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.treelet import Treelet 2 | from chirpy.core.response_generator_datatypes import ResponsePriority, ResponseGeneratorResult 3 | from chirpy.response_generators.wiki2.state import ConditionalState 4 | import logging 5 | 6 | logger = logging.getLogger('chirpylogger') 7 | 8 | 9 | class FactoidTreelet(Treelet): 10 | """ 11 | Get a factoid about the entity 12 | """ 13 | name = "wiki_factoid_treelet" 14 | 15 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 16 | entity = self.rg.state.cur_entity 17 | state, utterance, response_types = self.get_state_utterance_response_types() 18 | top_res, top_ack = self.rg.get_infilling_statement(entity) 19 | logger.info(f"Top res is: {top_res}") 20 | logger.info(f"Top ack is: {top_ack}") 21 | if top_res is not None: 22 | return ResponseGeneratorResult( 23 | text=f"Cool, {top_res}. What are your thoughts on {entity.talkable_name}?", 24 | priority=priority, 25 | state=self.rg.state, needs_prompt=False, cur_entity=entity, 26 | conditional_state=ConditionalState(prev_treelet_str=self.name, 27 | next_treelet_str='transition') 28 | ) 29 | else: 30 | if kwargs.get('redirect', False): 31 | return self.rg.check_user_knowledge_treelet.get_response(priority=priority) 32 | else: 33 | return self.rg.combined_til_treelet.get_response(priority=priority, redirect=True) 34 | -------------------------------------------------------------------------------- /chirpy/response_generators/wiki2/treelets/recheck_interest_treelet.py: -------------------------------------------------------------------------------- 1 | from chirpy.core.response_generator.treelet import Treelet 2 | from chirpy.core.response_generator_datatypes import ResponsePriority, ResponseGeneratorResult 3 | from chirpy.response_generators.wiki2.state import ConditionalState 4 | from chirpy.response_generators.wiki2.response_templates.response_components import DISCUSS_FURTHER_QUESTION 5 | import logging 6 | import random 7 | 8 | logger = logging.getLogger('chirpylogger') 9 | 10 | 11 | class RecheckInterestTreelet(Treelet): 12 | """ 13 | Activates after user says "No" to Factoid / Infiller / AcknowledgeUserKnowledge 14 | """ 15 | name = "recheck_interest_treelet" 16 | 17 | def get_response(self, priority=ResponsePriority.STRONG_CONTINUE, **kwargs): 18 | entity = self.rg.state.cur_entity 19 | text = random.choice([ 20 | "Oh, I hope I didn't make a mistake in explaining that.", 21 | "Hmm, sometimes I get things wrong so I might have made a mistake!" 22 | ]) 23 | qn = random.choice(DISCUSS_FURTHER_QUESTION) 24 | return ResponseGeneratorResult( 25 | text=f"{text} {qn.format(entity.name)}", 26 | priority=priority, 27 | state=self.rg.state, needs_prompt=False, cur_entity=entity, 28 | conditional_state=ConditionalState(prev_treelet_str=self.name, 29 | next_treelet_str='transition') 30 | ) 31 | -------------------------------------------------------------------------------- /exclude_files.txt: -------------------------------------------------------------------------------- 1 | .git/* 2 | convpara/* 3 | doc/* 4 | notebooks/* 5 | cruft/* 6 | entity_linker_eval/* 7 | idf/* 8 | docker/* 9 | -------------------------------------------------------------------------------- /find-oldest-line.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | num_files=`git ls-tree -r --name-only HEAD | wc -l` 3 | i=0 4 | for f in `git ls-tree -r --name-only HEAD`; do \ 5 | i=$((i+1)) 6 | printf "%d/%d %s\r" $i $num_files $f > /dev/tty 7 | echo "BEGIN_RECORD $f"; \ 8 | git blame -l -t -M -C -n -w -p $f; \ 9 | echo "END_RECORD $f"; \ 10 | done | ./find-old-lines.pl 11 | -------------------------------------------------------------------------------- /local_test_integ.sh: -------------------------------------------------------------------------------- 1 | # This command runs all integration tests, and saves the output to integ_test_results.txt. 2 | 3 | # Nosetests prints to stderr. 2>&1 reroutes stderr to stdout. 4 | # "| tee" writes stdout to file, while also showing stdout in terminal. 5 | 6 | # See the "integration testing" internal documentation to see how to change 7 | # the nosetests command to just run particular tests. 8 | 9 | nosetests -v test/integration_tests/*.py 2>&1 | tee integ_test_results.txt 10 | 11 | # Print out list of failed tests for convenience 12 | echo '\nList of failed tests (might be empty):' 13 | grep '... FAIL' integ_test_results.txt 14 | grep '... ERROR' integ_test_results.txt 15 | 16 | echo '\nSee integ_test_results.txt for full report' 17 | -------------------------------------------------------------------------------- /local_test_regex.sh: -------------------------------------------------------------------------------- 1 | # This command searches recursively through the chirpy/ directory, 2 | # and runs all unittests that can be discovered in any *.py file. 3 | # Currently, this means it runs all the RegexTemplate tests. 4 | # Note it does not run the integration tests, because those are in test/ not chirpy/ 5 | 6 | # IMPORTANT: python -m unittest discover says: 7 | # "For test discovery all test modules must be importable from the top level 8 | # directory of the project." 9 | # If your test isn't showing up, it may be because it isn't in an importable 10 | # module. Try adding an __init__.py file. b 11 | 12 | python -m unittest discover -s chirpy -p '*.py' -v 13 | 14 | # There is a known problem that tests run multiple times; it seems that there 15 | # are several possible reasons why: 16 | # https://www.google.com/search?q=unittest+tests+run+twice&rlz=1C5CHFA_enUS878US878&oq=unittest+tests+run+twice&aqs=chrome..69i57j33l3.5474j1j7&sourceid=chrome&ie=UTF-8 17 | # I (Abi) am not sure how to fix it, and it seems that all our tests are being 18 | # run, so I'm leaving it as-is. 19 | -------------------------------------------------------------------------------- /precompiled/psycopg2/_psycopg.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/precompiled/psycopg2/_psycopg.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file lists requirements for running various parts of the code locally (including dashboard, notebooks, unit tests, etc) 2 | 3 | boto3 4 | pyyaml 5 | requests 6 | injector==0.12.1 7 | pytz 8 | awscli 9 | colorama 10 | shyaml 11 | jinja2 12 | six 13 | nltk 14 | 15 | jsonpickle==1.3 # this needs to match the version in lambda_requirements.txt 16 | elasticsearch==7.8.0 17 | requests_aws4auth 18 | 19 | # needed for dashboard 20 | SQLAlchemy 21 | psycopg2-binary 22 | dash 23 | pandas 24 | plotly 25 | seaborn 26 | 27 | # needed for jupyter notebooks 28 | nbdime 29 | ipython-sql 30 | 31 | # needed for unittests 32 | textblob 33 | flask_restful 34 | vaderSentiment 35 | rasa_core 36 | nose 37 | parameterized 38 | 39 | # haojun need editdisance for fuzzy search 40 | editdistance 41 | 42 | # needed for aws cluster setup 43 | aws-parallelcluster >= 2.4 44 | 45 | # entity linker 46 | text2digits 47 | tabulate 48 | 49 | 50 | # movies - checking string similarity 51 | python-Levenshtein 52 | 53 | # entity linker - needed for asr robustness 54 | g2p_en 55 | metaphone==0.6 56 | pyxDamerauLevenshtein==1.6 57 | 58 | textstat 59 | -------------------------------------------------------------------------------- /scrapers/twitter/init_es.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | from requests_aws4auth import AWS4Auth 4 | from elasticsearch import Elasticsearch, RequestsHttpConnection 5 | from chirpy.core.util import get_es_host 6 | 7 | host = get_es_host("opinion_twitter") # the Amazon ES domain, with https:// 8 | region = os.environ.get('ES_REGION') 9 | service = 'es' 10 | credentials = boto3.Session().get_credentials() 11 | awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token) 12 | es = Elasticsearch( 13 | hosts = [{'host': host, 'port': 443}], 14 | http_auth = awsauth, 15 | use_ssl = True, 16 | verify_certs = True, 17 | connection_class = RequestsHttpConnection, 18 | ) 19 | 20 | mappings = { 21 | "properties": { 22 | "entity" : {"type": "text"}, 23 | "entity_keyword": {"type": "keyword"}, 24 | "reason": {"type": "text"}, 25 | "attitude": {"type": "keyword"}, 26 | "sentiment": {"type": "keyword"}, 27 | "tweet_id": {"type": "text"}, 28 | "original_text": {"type": "text"}, 29 | } 30 | } 31 | 32 | es.indices.create(index='opinion', body={'mappings': mappings}) 33 | es.indices.delete(index='opinion') 34 | result = es.search(index='opinion', body={ 35 | 'query': {'term': {'sentiment': 'positive'}} 36 | }) 37 | result = es.search(index='opinion', body={ 38 | 'query': {'term': {'entity_keyword': 'minecraft'}} 39 | }) 40 | print([hit['_source']['original_text'] for hit in result['hits']['hits']]) -------------------------------------------------------------------------------- /scrapers/twitter/init_postgres.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import os 3 | from chirpy.core.util import get_es_host 4 | 5 | host = 'localhost' 6 | host_stream = 'localhost' 7 | port = 5432 8 | database = 'twitter_opinions' 9 | user = os.environ.get('POSTGRES_USER') 10 | password = os.environ.get('POSTGRES_PASSWORD') 11 | conn = psycopg2.connect(host=host_stream, port=port, database=database, user=user, password=password) 12 | 13 | def fetch_sql(sql_statement): 14 | print(sql_statement) 15 | cur = conn.cursor() 16 | cur.execute(sql_statement) 17 | result = cur.fetchall() 18 | conn.commit() 19 | cur.close() 20 | return result 21 | 22 | def execute_sql(sql_statement): 23 | print(sql_statement) 24 | cur = conn.cursor() 25 | cur.execute(sql_statement) 26 | conn.commit() 27 | cur.close() 28 | return 29 | 30 | CREATE_TABLE = """ 31 | create table opinions ( 32 | id serial primary key, 33 | entity varchar(64), 34 | reason varchar(256), 35 | attitude varchar(16), 36 | sentiment varchar(16), 37 | creation_date_time timestamp, 38 | status jsonb 39 | ); 40 | """ 41 | execute_sql(CREATE_TABLE) 42 | 43 | DROP_TABLE = """ 44 | drop table labeled_opinions; 45 | """ 46 | execute_sql(DROP_TABLE) 47 | 48 | CREATE_TABLE = """ 49 | create table labeled_opinions ( 50 | id serial primary key, 51 | entity varchar(64), 52 | reason varchar(256), 53 | attitude varchar(16), 54 | sentiment varchar(16), 55 | reason_appropriateness numeric, 56 | tweet_id numeric, 57 | annotator varchar(16), 58 | creation_date_time timestamp 59 | ); 60 | """ 61 | execute_sql(CREATE_TABLE) 62 | 63 | DROP_TABLE = """ 64 | drop table annotator_opinions; 65 | """ 66 | execute_sql(DROP_TABLE) 67 | 68 | 69 | CREATE_TABLE = """ 70 | create table annotator_opinions ( 71 | id serial primary key, 72 | annotator varchar(64), 73 | entity varchar(64), 74 | entity_appropriate bool, 75 | sentiment varchar(16), 76 | creation_date_time timestamp 77 | ); 78 | """ 79 | execute_sql(CREATE_TABLE) 80 | 81 | DROP_TABLE = """ 82 | drop table labeled_phrases; 83 | """ 84 | execute_sql(DROP_TABLE) 85 | 86 | CREATE_TABLE = """ 87 | create table labeled_phrases ( 88 | id serial primary key, 89 | phrase varchar(64), 90 | category varchar(256), 91 | wiki_entity_name varchar(64), 92 | good_for_wiki bool, 93 | creation_date_time timestamp 94 | ); 95 | """ 96 | execute_sql(CREATE_TABLE) -------------------------------------------------------------------------------- /scrapers/twitter/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | tweepy 3 | requests_aws4auth 4 | elasticsearch 5 | psycopg2 -------------------------------------------------------------------------------- /scrapers/twitter/upload_entity_clusters_linked.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import psycopg2 4 | from typing import List, Tuple 5 | from chirpy.util import get_es_host 6 | ENTITY_CLUSTERS_LINKED = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'entity_clusters_linked.csv') 7 | 8 | host = get_es_host("postgres") 9 | host_stream = get_es_host("chirpy_stream") 10 | port = os.environ.get('POSTGRES_PORT') 11 | database = 'twitter_opinions' 12 | user = os.environ.get('POSTGRES_USER') 13 | password = os.environ.get('POSTGRES_PASSWORD') 14 | 15 | def insert(data : List[Tuple[str, str, str, bool]], host : str): 16 | conn = psycopg2.connect(host=host, port=port, database=database, user=user, password=password) # type: ignore 17 | cur = conn.cursor() 18 | cleaned_data = [[elem if elem != '' else None for elem in row] for row in data] 19 | args_str = b','.join(cur.mogrify("(%s, %s, %s, %s, CURRENT_TIMESTAMP)", row) for row in cleaned_data) 20 | cur.execute(b'insert into labeled_phrases (phrase, category, wiki_entity_name, good_for_wiki, creation_date_time) values ' + args_str) 21 | conn.commit() 22 | cur.close() 23 | conn.close() 24 | return 25 | 26 | 27 | def get_local_entity_clusters_linked() -> List[Tuple[str, str, str, bool]]: 28 | with open(ENTITY_CLUSTERS_LINKED, 'r') as f: 29 | rows = list(csv.reader(f))[1:] 30 | rows = [tuple(row[:-1]) + (row[-1] == 'True',) for row in rows] 31 | return rows 32 | 33 | rows = get_local_entity_clusters_linked() 34 | insert(rows, host_stream) 35 | -------------------------------------------------------------------------------- /servers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/servers/__init__.py -------------------------------------------------------------------------------- /servers/local/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/servers/local/__init__.py -------------------------------------------------------------------------------- /servers/local/local_callable_manager.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | class LocalCallableManager: 5 | def __init__(self, config): 6 | self.config = config 7 | self.module_to_container_id = {} 8 | 9 | def start_containers(self): 10 | for module_name in self.config: 11 | if self.config[module_name]['url'] != 'none': 12 | self.start_container(module_name) 13 | return 14 | 15 | def start_container(self, module_name): 16 | port = self.config[module_name]['port'] 17 | docker_file_dir = self.config[module_name]['docker_file_dir'] 18 | 19 | # check if container is already running 20 | running = subprocess.run(['docker', 'container', 'ls', '-q', '-f', 'name={}'.format(module_name)], 21 | stdout=subprocess.PIPE) 22 | if running.stdout: return 23 | 24 | # build docker image 25 | print(f"Building {module_name} using the docker file at {docker_file_dir}.") 26 | building = subprocess.run(['docker', 'build', '-t', module_name, docker_file_dir], check=True) 27 | print(f"Finished building {module_name}.") 28 | print(building) 29 | 30 | print(f"Running the container {module_name}.") 31 | # run the docker container 32 | container_process = subprocess.run(['docker', 'run', '-d', '--rm', '-p', 33 | '{}:80'.format(port), 34 | '--name', 35 | '{}'.format(module_name), 36 | '{}:latest'.format(module_name)], 37 | check=True, stdout=subprocess.PIPE) 38 | print(f"Finished running {module_name}.") 39 | print(container_process) 40 | 41 | container_id = container_process.stdout.decode('utf-8').strip() 42 | self.module_to_container_id[module_name] = container_id 43 | 44 | def stop_containers(self): 45 | for container_id in self.module_to_container_id.values(): 46 | subprocess.run(['docker', 'stop', container_id]) -------------------------------------------------------------------------------- /servers/remote/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim-buster 2 | 3 | COPY ./servers/remote/requirements.txt /deploy/servers/remote/requirements.txt 4 | WORKDIR /deploy 5 | 6 | #RUN apt-get update -y 7 | #RUN apt-get install -y curl 8 | 9 | # update pip 10 | RUN pip install pip --upgrade 11 | RUN pip install -r /deploy/servers/remote/requirements.txt 12 | 13 | # Setup flask application 14 | EXPOSE 5001 5432 4080 4081 4082 4083 4084 4085 15 | 16 | COPY ./ /deploy/ 17 | CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:5001", "servers.remote.chat_api:app"] 18 | #CMD ["python", "-m" ,"remote.chat_api"] 19 | #RUN mkdir -p /deploy/app 20 | -------------------------------------------------------------------------------- /servers/remote/README.md: -------------------------------------------------------------------------------- 1 | It expects the following environment variables to be set 2 | 3 | **Elasticsearch** 4 | 5 | * `ES_SCHEME` - `https` 6 | * `ES_HOST` - `localhost` or url (eg ....us-west-2.es.amazonaws.com) 7 | * `ES_PORT` - `443` 8 | * `ES_USER` 9 | * `ES_PASSWORD` 10 | 11 | **Postgres** 12 | * `POSTGRES_HOST` - `localhost` or `host.docker.internal` (when running on docker for Mac/Windows and the db is on localhost) or the url 13 | * `POSTGRES_USER_` 14 | * `POSTGRES_PASSWORD` 15 | 16 | **Callable urls** 17 | * `corenlp_URL` - `localhost:port` or remote url 18 | * `dialogact_URL` 19 | * `g2p_URL` 20 | * `gpt2ed_URL` 21 | * `question_URL` 22 | * `stanfordnlp_URL` 23 | 24 | The recommended method is to store them in a file such as `local_env.list` 25 | in the format 26 | ``` 27 | ES_SCHEME=https 28 | ES_HOST=localhost 29 | ... 30 | ``` 31 | 32 | ## Runing locally 33 | To run the server locally, run from project directory 34 | ``` 35 | source servers/remote/local_env.list 36 | python -m servers.remote.chat_api 37 | ``` 38 | 39 | ## Running via Docker 40 | To build (from the project directory) 41 | ``` 42 | docker build --file servers/remote/Dockerfile . 43 | ``` 44 | This adds the entire project as context and builds the docker container terminating with an output like `Successfully built d2b0029ce2da` 45 | 46 | To run the container, you will need to create another list of environment variables, say `docker_env.list` 47 | Here `localhost` should be replaced by `host.docker.internal` when running using Docker for Mac/Windows 48 | ``` 49 | docker run -p 5001:5001 --env-file docker_env.list d2b0029ce2da 50 | ``` 51 | -------------------------------------------------------------------------------- /servers/remote/requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | # This file lists requirements for AWS Lambda to run the bot. 3 | # requirements.txt is the longer list of requirements for running all different parts of the code locally (including dashboard, notebooks, unit tests, etc) 4 | 5 | boto3==1.17.21 6 | requests==2.14.2 7 | pytz==2018.5 8 | requests_aws4auth==0.9 9 | elasticsearch==7.5.1 10 | certifi 11 | jsonpickle==1.3 12 | colorama==0.4.3 13 | nltk==3.4.5 14 | text2digits==0.0.9 15 | tabulate==0.8.7 16 | inflect==5.0.2 17 | metaphone==0.6 18 | textstat==0.7.1 19 | 20 | editdistance==0.5.3 21 | 22 | psycopg2-binary==2.8.6 23 | 24 | flask==1.1.2 25 | jinja2==3.0.3 26 | itsdangerous==2.0.1 27 | werkzeug==2.0.2 28 | gunicorn==20.0.4 29 | flask-cors==3.0.8 30 | -------------------------------------------------------------------------------- /servers/remote/static/chirpy.js: -------------------------------------------------------------------------------- 1 | 2 | const ChatBox = { 3 | data() { 4 | return { 5 | session_uuid: null, 6 | user_uuid: null, 7 | payload: null, 8 | utterances: [ 9 | ], 10 | newUtterance: '', 11 | } 12 | }, 13 | methods: { 14 | pushUtterance: function(speaker, text) { 15 | if(speaker==='bot'){ 16 | text = text.replace('Hi, this is an Alexa Prize Socialbot.', 'Hi, I\'m Chirpy Cardinal.'); 17 | } 18 | this.utterances.push({speaker: speaker, text: text}); 19 | this.$nextTick(function(){ 20 | realign_transcript() 21 | }); 22 | }, 23 | submit: function(){ 24 | document.newUtteranceForm.newUtterance.focus(); 25 | var that = this; 26 | if(!this.newUtterance=='') { 27 | this.pushUtterance('user', this.newUtterance); 28 | } 29 | axios.post('/conversation', { 30 | payload: this.payload, 31 | session_uuid: this.session_uuid, 32 | user_uuid: this.user_uuid, 33 | user_utterance: this.newUtterance 34 | }) 35 | .then(function(response) { 36 | var data = response.data; 37 | that.pushUtterance('bot', data.bot_utterance); 38 | that.payload = data.payload; 39 | that.session_uuid= data.session_uuid; 40 | that.user_uuid = data.user_uuid; 41 | console.log(response); 42 | }) 43 | .catch(function(error){ 44 | console.log(error); 45 | }); 46 | this.newUtterance= ''; 47 | } 48 | 49 | } 50 | } 51 | function realign_transcript(){ 52 | var element = document.getElementsByClassName("transcript"); 53 | element[0].scrollTop = element[0].scrollHeight; 54 | } 55 | window.addEventListener('resize', realign_transcript); 56 | const app = Vue.createApp(ChatBox); 57 | const vm = app.mount("#app"); 58 | vm.submit(); -------------------------------------------------------------------------------- /servers/remote/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Title 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
16 |
17 |
18 |
19 |
{{utterance.text}}
20 |
21 |
22 |
23 | 24 | 25 | 31 |
32 | 33 |
34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /servers/remote/static/send.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/Dockerfile_regex: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim-buster 2 | 3 | RUN apt-get update -y && apt-get install -y gcc g++ curl make 4 | RUN curl -sL https://deb.nodesource.com/setup_10.x | bash && apt-get install -y nodejs 5 | RUN npm -g config set user root 6 | RUN npm install -g ask-cli 7 | RUN pip install pip --upgrade 8 | RUN pip install awscli 9 | 10 | RUN mkdir -p /root/app 11 | COPY requirements.txt /root/app/requirements.txt 12 | RUN pip install -r /root/app/requirements.txt 13 | 14 | COPY . /root/app 15 | RUN mv /root/app/.aws ~/ 16 | RUN ls /root/app 17 | RUN cd /root/app && python -m unittest discover -s chirpy -p '*.py' -v -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/test/__init__.py -------------------------------------------------------------------------------- /test/integration_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanfordnlp/chirpycardinal/a43f0cf3cc21c406a4acd60428d53c44c15f5947/test/integration_tests/__init__.py -------------------------------------------------------------------------------- /test/integration_tests/commands.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | 3 | class TestCommandsResponseGenerator(BaseIntegrationTest): 4 | launch_sequence = ['let\'s chat', 'my name is jamie'] 5 | 6 | def test_command_after_firstutt(self): 7 | _, current_state, response_text = self.init_and_first_turn() 8 | _, current_state, response_text = self.process_utterance('play music') 9 | self.assertIn('This is an Alexa Prize Socialbot. I\'m happy to chat with you', response_text) 10 | 11 | def test_command_after_introseq(self): 12 | _, current_state, response_text = self.process_utterance('play music') 13 | self.assertIn('This is an Alexa Prize Socialbot. I\'m happy to chat with you', response_text) -------------------------------------------------------------------------------- /test/integration_tests/corenlp.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | from parameterized import parameterized 3 | from chirpy.annotators.corenlp import Sentiment 4 | 5 | 6 | class TestCoreNLP(BaseIntegrationTest): 7 | 8 | def test_corenlp(self): 9 | """Check that the corenlp output matches what we expect for an example utterance""" 10 | _, current_state, response_text = self.init_and_first_turn('serena williams is the best tennis player ever') 11 | self.assertSetEqual(set(current_state['corenlp']['ner_mentions']), {('serena williams', 'PERSON'), ('tennis player', 'TITLE')}) 12 | self.assertEqual(current_state['corenlp']['sentiment'], Sentiment.POSITIVE) 13 | self.assertSetEqual(set(current_state['corenlp']['nounphrases']), {'serena williams', 'the best tennis player'}) 14 | self.assertSetEqual(set(current_state['corenlp']['verbphrases']), {'is the best tennis player ever'}) 15 | self.assertSetEqual(set(current_state['corenlp']['proper_nouns']), {'serena williams'}) 16 | self.assertSetEqual(set(current_state['corenlp']['nouns']), {'tennis', 'player'}) 17 | -------------------------------------------------------------------------------- /test/integration_tests/flags.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from chirpy.core.flags import use_timeouts 3 | 4 | class TestFlags(TestCase): 5 | def test_timeouts_on(self): 6 | """Check that the use_timeouts flag is on""" 7 | self.assertTrue(use_timeouts) -------------------------------------------------------------------------------- /test/integration_tests/offensive_classifier.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | from parameterized import parameterized 3 | from chirpy.core.offensive_classifier.offensive_classifier import contains_offensive 4 | from chirpy.core.entity_linker.lists import STOPWORDS 5 | 6 | class TestOffensiveClassifier(BaseIntegrationTest): 7 | 8 | @parameterized.expand(['fuck', 'fuck you', 'you fuck', 'alexa fuck you', 'FuCk', 'fucks', "fuck's", "fuck'll", 9 | 'fuck-day', 'i said...fuck!', 'f**k', 'f#@k']) 10 | def test_offensive(self, user_utterance): 11 | """ 12 | Check that the offensive classifier recognizes offensive phrases, robust to case, singular/plural, punctuation, 13 | position in text, etc. 14 | """ 15 | self.assertTrue(contains_offensive(user_utterance)) 16 | 17 | def test_stopwords_inoffensive(self): 18 | """ 19 | Check that the offensive classifier doesn't classify any stopwords as offensive. 20 | This isn't a comprehensive check for false positives, but it checks for the most common inoffensive words. 21 | """ 22 | self.assertFalse(contains_offensive(' '.join(STOPWORDS))) 23 | 24 | def test_added_phrases(self): 25 | """ 26 | Check that the offensive classifier recognizes manually added offensive phrases 27 | """ 28 | self.assertTrue(contains_offensive("i'm watching pornhub")) 29 | 30 | def test_removed_phrases(self): 31 | """ 32 | Check that the offensive classifier doesn't recognize manually removed phrases 33 | """ 34 | self.assertFalse(contains_offensive("i love ginger cake")) 35 | 36 | def test_whitelist(self): 37 | """ 38 | Check that the offensive classifier doesn't recognize phrases in the whitelist but still recognizes offensive 39 | phrases elsewhere in the text 40 | """ 41 | self.assertFalse(contains_offensive("have you seen kill bill")) 42 | self.assertTrue(contains_offensive("fuck have you seen kill bill")) 43 | 44 | 45 | -------------------------------------------------------------------------------- /test/integration_tests/offensiveuser.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | from parameterized import parameterized 3 | 4 | class TestOffensiveUserResponseGenerator(BaseIntegrationTest): 5 | launch_sequence = ['let\'s chat'] 6 | 7 | def test_offensive_utterance(self): 8 | """Check that OFFENSIVE_USER RG responds to an offensive user utterance""" 9 | for user_utterance in ['fuck you alexa']: 10 | self.reset_ask_to_post_launch_sequence() 11 | _, current_state, response_text = self.process_utterance(user_utterance) 12 | self.assertEqual(current_state['selected_response_rg'], 'OFFENSIVE_USER') 13 | 14 | @parameterized.expand(['fuck no', 'yes bitch']) 15 | def test_offensive_yesno(self, user_utterance): 16 | """Check that OFFENSIVE_USER RG doesn't respond to an offensive yes/no utterance""" 17 | self.reset_ask_to_post_launch_sequence() 18 | _, current_state, response_text = self.process_utterance(user_utterance) 19 | self.assertNotEqual(current_state['selected_response_rg'], 'OFFENSIVE_USER') 20 | 21 | @parameterized.expand(['you suck alexa', "what's wrong with you"]) 22 | def test_critical_utterance(self, user_utterance): 23 | """Check that OFFENSIVE_USER RG responds to user criticism appropriately""" 24 | self.reset_ask_to_post_launch_sequence() 25 | _, current_state, response_text = self.process_utterance(user_utterance) 26 | self.assertEqual(current_state['selected_response_rg'], 'OFFENSIVE_USER') 27 | 28 | @parameterized.expand(['i watched hell\'s kitchen', 'there was a movie called hitler\'s demise', 'i like to watch sex education']) 29 | def test_no_response(self, user_utterance): 30 | """Check that OFFENSIVE_USER RG not respond appropriately""" 31 | self.reset_ask_to_post_launch_sequence() 32 | _, current_state, response_text = self.process_utterance(user_utterance) 33 | self.assertNotEqual(current_state['selected_response_rg'], 'OFFENSIVE_USER') -------------------------------------------------------------------------------- /test/integration_tests/one_turn_hack.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | from chirpy.response_generators.one_turn_hack_response_generator import one_turn_responses 3 | from random import uniform 4 | from parameterized import parameterized 5 | 6 | class OneTurnHackResponseGenerator(BaseIntegrationTest): 7 | 8 | launch_sequence = ['let\'s chat', 'my name is jamie', 'pretty good and you', 'books', 'supernova'] 9 | @parameterized.expand(one_turn_responses.items()) 10 | def test_several_phrases_affirmative_response(self, utterance, response): 11 | if uniform(0, 1) < 0.5: 12 | utterance = "alexa " + utterance 13 | _, current_state, response_text = self.process_utterance(utterance) 14 | expected_response_text, needs_prompt = response 15 | self.assertIn(expected_response_text, response_text) 16 | 17 | -------------------------------------------------------------------------------- /test/integration_tests/question.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | from agents.local_agent import apology_string 3 | 4 | class TestQuestion(BaseIntegrationTest): 5 | launch_sequence = ['let\'s chat'] 6 | 7 | def test_is_question(self): 8 | """Check that the question classifier predicts True for question""" 9 | self.reset_ask_to_post_launch_sequence() 10 | _, current_state, response_text = self.process_utterance("my day was good how was yours") 11 | self.assertEqual(current_state['question']['is_question'], True) 12 | self.assertNotIn(apology_string, response_text) # check it didn't result in fatal error 13 | 14 | def test_not_question(self): 15 | """Check that the question classifier predicts False for utterance that is not question""" 16 | self.reset_ask_to_post_launch_sequence() 17 | _, current_state, response_text = self.process_utterance("I like dogs") 18 | self.assertEqual(current_state['question']['is_question'], False) 19 | self.assertNotIn(apology_string, response_text) # check it didn't result in fatal error 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /test/integration_tests/stanfordnlp.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | from parameterized import parameterized 3 | 4 | 5 | class TestStanfordNLP(BaseIntegrationTest): 6 | 7 | def test_stanfordnlp(self): 8 | """ 9 | Check that the stanfordnlp output matches what we expect for an example utterance. 10 | This test just checks that the module ran and the output is the same as usual. 11 | These stanfordnlp outputs don't really seem right. We should look at how the postprocessing code is getting these outputs. 12 | """ 13 | _, current_state, response_text = self.init_and_first_turn('serena williams is the best tennis player ever') 14 | self.assertSetEqual(set(current_state['stanfordnlp']['nouns']), {'player'}) 15 | self.assertSetEqual(set(current_state['stanfordnlp']['nounphrases']), {'serena williams is the best tennis player ever'}) 16 | self.assertSetEqual(set(current_state['stanfordnlp']['proper_nouns']), {'serena williams'}) -------------------------------------------------------------------------------- /test/integration_tests/stop.py: -------------------------------------------------------------------------------- 1 | from .integration_base import BaseIntegrationTest 2 | from parameterized import parameterized 3 | 4 | 5 | class TestStoppingWords(BaseIntegrationTest): 6 | launch_sequence = ['let\'s chat', 'my name is leland'] 7 | 8 | @parameterized.expand(['shut off', 'cancel', 'off', 'alexa off', 'be quiet', 'end chat', 9 | 'can you please stop', 'leave me alone', 'pause']) 10 | def test_stop(self, phrase): 11 | alexa_response, _, _ = self.process_utterance(phrase) 12 | self.assertTrue(alexa_response['response']['shouldEndSession']) -------------------------------------------------------------------------------- /wiki-es-dump/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark 2 | mwparserfromhell 3 | elasticsearch -------------------------------------------------------------------------------- /wiki-es-dump/upload.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from ast import literal_eval 4 | from pyspark import SparkContext, SparkConf 5 | from elasticsearch.helpers import bulk 6 | from elasticsearch import Elasticsearch 7 | 8 | 9 | NAMESPACE_TITLE = r'(User|Wikipedia|WP|Project|WT|File|Image|MediaWiki|Template|Help|Portal|Book|Draft|TimedText|Module|Category|Talk):.*' 10 | NAMESPACE_TALK_TITLE = r'(User|Wikipedia|WP|Project|File|Image|MediaWiki|Template|Help|Portal|Book|Draft|TimedText|Module|Category) talk:.*' 11 | 12 | conf = SparkConf().setAppName('wiki-upload').set('spark.driver.maxResultSize', 0) 13 | sc = SparkContext(conf=conf) 14 | 15 | def upload_partition(partition): 16 | es = Elasticsearch([{'host': HOST, 'port': PORT}], http_auth=(USERNAME, PASSWORD), scheme=SCHEME, timeout=99999) 17 | actions = ({ 18 | '_index': index, 19 | '_type': '_doc', 20 | '_source' : article 21 | } for index, article in partition) 22 | try: 23 | bulk(es, actions) 24 | except elasticsearch.helpers.errors.BulkIndexError as error: 25 | print('Bulk Index Error in partition: ' + str(error)) 26 | 27 | if __name__ == "__main__": 28 | conf = SparkConf().setAppName('wiki-parse').set('spark.driver.maxResultSize', 0) 29 | sc = SparkContext(conf=conf) 30 | 31 | parser = argparse.ArgumentParser(description='Fully process a Wikipedia Dump') 32 | parser.add_argument('sections_path', type=str, help='Fully Qualified path of the proessed *-sections.json.bz2 file') 33 | parser.add_argument('articles_path', type=str, help='Fully Qualified path of the *-integrated.json.bz2 file') 34 | parser.add_argument('-d', '--domain', type=str, help='The host domain name of the ES index') 35 | parser.add_argument('-p', '--port', type=str, help='The port of the ES index') 36 | parser.add_argument('-U', '--username', type=str, help='The username of the ES index') 37 | parser.add_argument('-P', '--password', type=str, help='The password of the ES index') 38 | parser.add_argument('-s', '--scheme', type=str, default='http', help='The scheme to use for the connection') 39 | args = parser.parse_args() 40 | HOST, PORT, USERNAME, PASSWORD, SCHEME = args.domain, args.port, args.username, args.password, args.scheme 41 | sc.textFile(args.sections_path)\ 42 | .map(lambda s: ('enwiki-20201201-sections', literal_eval(s)))\ 43 | .foreachPartition(upload_partition) 44 | sc.textFile(args.articles_path)\ 45 | .map(lambda s: literal_eval(s))\ 46 | .map(lambda tup: ('enwiki-20201201-articles', tup[1]))\ 47 | .foreachPartition(upload_partition) 48 | -------------------------------------------------------------------------------- /wiki-es-dump/wiki-setup.md: -------------------------------------------------------------------------------- 1 | # wiki-es-dump 2 | A small repository of code processing raw wiki files into easy-to-understand format using PySpark 3 | 4 | ## Requirements 5 | - Java 8 is required for running PySpark 6 | - pyspark for multi-thread processing 7 | - mwparserfromhell for parsing WikiCode 8 | - elasticsearch for defining and uploading the indices 9 | 10 | ## Storage Requirements 11 | This processing script requires at least 250G of storage space (not counting scratch space for Spark). 12 | All intermediate files will be stored at the same directory as the raw data files. 13 | These files are created in case unexpected errors happen and users can recover quickly. 14 | These files needs to be manually deleted after the processing is done. 15 | 16 | ## Usage 17 | 18 | 1. First, run the preprocess.py file using spark. Example: 19 | 20 | ``` 21 | SPARK_SCRATCH_DIR = '/absolute/path/to/large/scratch_dir' 22 | JAVA_HOME = '/path/to/java/home 23 | DUMP_PATH = '/absolute/path/to/wikipedia_dump' 24 | PAGEVIEW_PATH = '/absolute/path/to/wikipedia_pageview' 25 | WIKIDATA_PATH = '/absolute/path/to/wikidata 26 | 27 | spark-submit \ 28 | --conf spark.local.dir=SPARK_SCRATCH_DIR \ 29 | --driver-memory 50G \ 30 | preprocess.py \ 31 | DUMP_PATH PAGEVIEW_PATH WIKIDATA_PATH 24 32 | ``` 33 | 34 | Note: 35 | - Spark scratch directory is optional, but on some machines the scratch directory is not large enough 36 | for spark, and thus need to be manually set 37 | - Java home is also optional, but on some machines spark has a hard time finding the right runtime. 38 | 39 | 2. Next, start an interactive shell and use the contents of define_es.py to define the indices with the correct 40 | mapping. Alternatively, you can run `python define_es.py --help` for usage information 41 | 42 | 3. Run `python upload.py --help` to see the description of upload script usage, and use `spark-submit` similar 43 | to step 1 to upload your processed files into elastic search --------------------------------------------------------------------------------