├── .dockerignore ├── .eslintrc.js ├── .gitignore ├── .prettierrc ├── LICENSE ├── Makefile ├── README.md ├── azure-pipelines.yml ├── binder └── requirements.txt ├── chapters ├── LICENSE ├── de │ ├── chapter1.md │ ├── chapter2.md │ ├── chapter3.md │ ├── chapter4.md │ └── slides │ │ ├── chapter1_01_introduction-to-spacy.md │ │ ├── chapter1_02_statistical-models.md │ │ ├── chapter1_03_rule-based-matching.md │ │ ├── chapter2_01_data-structures-1.md │ │ ├── chapter2_02_data-structures-2.md │ │ ├── chapter2_03_word-vectors-similarity.md │ │ ├── chapter2_04_models-rules.md │ │ ├── chapter3_01_processing-pipelines.md │ │ ├── chapter3_02_custom-pipeline-components.md │ │ ├── chapter3_03_extension-attributes.md │ │ ├── chapter3_04_scaling-performance.md │ │ ├── chapter4_01_training-updating-models.md │ │ ├── chapter4_02_running-training.md │ │ ├── chapter4_03_training-best-practices.md │ │ └── chapter4_04_wrapping-up.md ├── en │ ├── chapter1.md │ ├── chapter2.md │ ├── chapter3.md │ ├── chapter4.md │ └── slides │ │ ├── chapter1_01_introduction-to-spacy.md │ │ ├── chapter1_02_statistical-models.md │ │ ├── chapter1_03_rule-based-matching.md │ │ ├── chapter2_01_data-structures-1.md │ │ ├── chapter2_02_data-structures-2.md │ │ ├── chapter2_03_word-vectors-similarity.md │ │ ├── chapter2_04_models-rules.md │ │ ├── chapter3_01_processing-pipelines.md │ │ ├── chapter3_02_custom-pipeline-components.md │ │ ├── chapter3_03_extension-attributes.md │ │ ├── chapter3_04_scaling-performance.md │ │ ├── chapter4_01_training-updating-models.md │ │ ├── chapter4_02_running-training.md │ │ ├── chapter4_03_training-best-practices.md │ │ └── chapter4_04_wrapping-up.md ├── es │ ├── chapter1.md │ ├── chapter2.md │ ├── chapter3.md │ ├── chapter4.md │ └── slides │ │ ├── chapter1_01_introduction-to-spacy.md │ │ ├── chapter1_02_statistical-models.md │ │ ├── chapter1_03_rule-based-matching.md │ │ ├── chapter2_01_data-structures-1.md │ │ ├── chapter2_02_data-structures-2.md │ │ ├── chapter2_03_word-vectors-similarity.md │ │ ├── chapter2_04_models-rules.md │ │ ├── chapter3_01_processing-pipelines.md │ │ ├── chapter3_02_custom-pipeline-components.md │ │ ├── chapter3_03_extension-attributes.md │ │ ├── chapter3_04_scaling-performance.md │ │ ├── chapter4_01_training-updating-models.md │ │ ├── chapter4_02_running-training.md │ │ ├── chapter4_03_training-best-practices.md │ │ └── chapter4_04_wrapping-up.md ├── fr │ ├── chapter1.md │ ├── chapter2.md │ ├── chapter3.md │ ├── chapter4.md │ └── slides │ │ ├── chapter1_01_introduction-to-spacy.md │ │ ├── chapter1_02_statistical-models.md │ │ ├── chapter1_03_rule-based-matching.md │ │ ├── chapter2_01_data-structures-1.md │ │ ├── chapter2_02_data-structures-2.md │ │ ├── chapter2_03_word-vectors-similarity.md │ │ ├── chapter2_04_models-rules.md │ │ ├── chapter3_01_processing-pipelines.md │ │ ├── chapter3_02_custom-pipeline-components.md │ │ ├── chapter3_03_extension-attributes.md │ │ ├── chapter3_04_scaling-performance.md │ │ ├── chapter4_01_training-updating-models.md │ │ ├── chapter4_02_running-training.md │ │ ├── chapter4_03_training-best-practices.md │ │ └── chapter4_04_wrapping-up.md ├── ja │ ├── README.txt │ ├── chapter1.md │ ├── chapter2.md │ ├── chapter3.md │ ├── chapter4.md │ └── slides │ │ ├── chapter1_01_introduction-to-spacy.md │ │ ├── chapter1_02_statistical-models.md │ │ ├── chapter1_03_rule-based-matching.md │ │ ├── chapter2_01_data-structures-1.md │ │ ├── chapter2_02_data-structures-2.md │ │ ├── chapter2_03_word-vectors-similarity.md │ │ ├── chapter2_04_models-rules.md │ │ ├── chapter3_01_processing-pipelines.md │ │ ├── chapter3_02_custom-pipeline-components.md │ │ ├── chapter3_03_extension-attributes.md │ │ ├── chapter3_04_scaling-performance.md │ │ ├── chapter4_01_training-updating-models.md │ │ ├── chapter4_02_running-training.md │ │ ├── chapter4_03_training-best-practices.md │ │ └── chapter4_04_wrapping-up.md ├── pt │ ├── chapter1.md │ ├── chapter2.md │ ├── chapter3.md │ ├── chapter4.md │ └── slides │ │ ├── chapter1_01_introduction-to-spacy.md │ │ ├── chapter1_02_statistical-models.md │ │ ├── chapter1_03_rule-based-matching.md │ │ ├── chapter2_01_data-structures-1.md │ │ ├── chapter2_02_data-structures-2.md │ │ ├── chapter2_03_word-vectors-similarity.md │ │ ├── chapter2_04_models-rules.md │ │ ├── chapter3_01_processing-pipelines.md │ │ ├── chapter3_02_custom-pipeline-components.md │ │ ├── chapter3_03_extension-attributes.md │ │ ├── chapter3_04_scaling-performance.md │ │ ├── chapter4_01_training-updating-models.md │ │ ├── chapter4_02_running-training.md │ │ ├── chapter4_02_training-loop.md │ │ ├── chapter4_03_training-best-practices.md │ │ └── chapter4_04_wrapping-up.md └── zh │ ├── chapter1.md │ ├── chapter2.md │ ├── chapter3.md │ ├── chapter4.md │ └── slides │ ├── chapter1_01_introduction-to-spacy.md │ ├── chapter1_02_statistical-models.md │ ├── chapter1_03_rule-based-matching.md │ ├── chapter2_01_data-structures-1.md │ ├── chapter2_02_data-structures-2.md │ ├── chapter2_03_word-vectors-similarity.md │ ├── chapter2_04_models-rules.md │ ├── chapter3_01_processing-pipelines.md │ ├── chapter3_02_custom-pipeline-components.md │ ├── chapter3_03_extension-attributes.md │ ├── chapter3_04_scaling-performance.md │ ├── chapter4_01_training-updating-models.md │ ├── chapter4_02_running-training.md │ ├── chapter4_02_training-loop.md │ ├── chapter4_03_training-best-practices.md │ └── chapter4_04_wrapping-up.md ├── conftest.py ├── docker └── Dockerfile ├── exercises ├── de │ ├── bookquotes.json │ ├── capitals.json │ ├── config_gadget.cfg │ ├── countries.json │ ├── country_text.txt │ ├── dev_gadget.spacy │ ├── exc_01_02_01.py │ ├── exc_01_02_02.py │ ├── exc_01_02_03.py │ ├── exc_01_03_01.py │ ├── exc_01_03_02.py │ ├── exc_01_04.py │ ├── exc_01_07.py │ ├── exc_01_08_01.py │ ├── exc_01_08_02.py │ ├── exc_01_09.py │ ├── exc_01_11.py │ ├── exc_01_12_01.py │ ├── exc_01_12_02.py │ ├── exc_01_12_03.py │ ├── exc_02_02_01.py │ ├── exc_02_02_02.py │ ├── exc_02_05_01.py │ ├── exc_02_05_02.py │ ├── exc_02_05_03.py │ ├── exc_02_06.py │ ├── exc_02_07.py │ ├── exc_02_09.py │ ├── exc_02_10_01.py │ ├── exc_02_10_02.py │ ├── exc_02_10_03.py │ ├── exc_02_13.py │ ├── exc_02_14.py │ ├── exc_02_15.py │ ├── exc_03_03.py │ ├── exc_03_06.py │ ├── exc_03_07.py │ ├── exc_03_09_01.py │ ├── exc_03_09_02.py │ ├── exc_03_10_01.py │ ├── exc_03_10_02.py │ ├── exc_03_11.py │ ├── exc_03_12.py │ ├── exc_03_14_01.py │ ├── exc_03_14_02.py │ ├── exc_03_14_03.py │ ├── exc_03_15.py │ ├── exc_03_16_01.py │ ├── exc_03_16_02.py │ ├── exc_04_03.py │ ├── exc_04_04.py │ ├── exc_04_07_01.sh │ ├── exc_04_07_02.sh │ ├── exc_04_08.sh │ ├── exc_04_11.py │ ├── exc_04_12_01.py │ ├── exc_04_12_02.py │ ├── gadgets.json │ ├── iphone.json │ ├── solution_01_02_01.py │ ├── solution_01_02_02.py │ ├── solution_01_02_03.py │ ├── solution_01_03_01.py │ ├── solution_01_03_02.py │ ├── solution_01_04.py │ ├── solution_01_07.py │ ├── solution_01_08_01.py │ ├── solution_01_08_02.py │ ├── solution_01_09.py │ ├── solution_01_11.py │ ├── solution_01_12_01.py │ ├── solution_01_12_02.py │ ├── solution_01_12_03.py │ ├── solution_02_02_01.py │ ├── solution_02_02_02.py │ ├── solution_02_05_01.py │ ├── solution_02_05_02.py │ ├── solution_02_05_03.py │ ├── solution_02_06.py │ ├── solution_02_07.py │ ├── solution_02_09.py │ ├── solution_02_10_01.py │ ├── solution_02_10_02.py │ ├── solution_02_10_03.py │ ├── solution_02_13.py │ ├── solution_02_14.py │ ├── solution_02_15.py │ ├── solution_03_03.py │ ├── solution_03_06.py │ ├── solution_03_07.py │ ├── solution_03_09_01.py │ ├── solution_03_09_02.py │ ├── solution_03_10_01.py │ ├── solution_03_10_02.py │ ├── solution_03_11.py │ ├── solution_03_12.py │ ├── solution_03_14_01.py │ ├── solution_03_14_02.py │ ├── solution_03_14_03.py │ ├── solution_03_15.py │ ├── solution_03_16_01.py │ ├── solution_03_16_02.py │ ├── solution_04_03.py │ ├── solution_04_04.py │ ├── solution_04_07_01.sh │ ├── solution_04_07_02.sh │ ├── solution_04_08.sh │ ├── solution_04_11.py │ ├── solution_04_12_01.py │ ├── solution_04_12_02.py │ ├── test_01_02_01.py │ ├── test_01_02_02.py │ ├── test_01_02_03.py │ ├── test_01_03_01.py │ ├── test_01_03_02.py │ ├── test_01_04.py │ ├── test_01_07.py │ ├── test_01_08_01.py │ ├── test_01_08_02.py │ ├── test_01_09.py │ ├── test_01_11.py │ ├── test_01_12_01.py │ ├── test_01_12_02.py │ ├── test_01_12_03.py │ ├── test_02_02_01.py │ ├── test_02_02_02.py │ ├── test_02_05_01.py │ ├── test_02_05_02.py │ ├── test_02_05_03.py │ ├── test_02_06.py │ ├── test_02_07.py │ ├── test_02_09.py │ ├── test_02_10_01.py │ ├── test_02_10_02.py │ ├── test_02_10_03.py │ ├── test_02_13.py │ ├── test_02_14.py │ ├── test_02_15.py │ ├── test_03_03.py │ ├── test_03_06.py │ ├── test_03_07.py │ ├── test_03_09_01.py │ ├── test_03_09_02.py │ ├── test_03_10_01.py │ ├── test_03_10_02.py │ ├── test_03_11.py │ ├── test_03_12.py │ ├── test_03_14_01.py │ ├── test_03_14_02.py │ ├── test_03_14_03.py │ ├── test_03_15.py │ ├── test_03_16_01.py │ ├── test_03_16_02.py │ ├── test_04_03.py │ ├── test_04_04.py │ ├── test_04_11.py │ ├── test_04_12_01.py │ ├── test_04_12_02.py │ ├── test_general.py │ ├── train_gadget.spacy │ └── tweets.json ├── en │ ├── bookquotes.json │ ├── capitals.json │ ├── config_gadget.cfg │ ├── countries.json │ ├── country_text.txt │ ├── dev_gadget.spacy │ ├── exc_01_02_01.py │ ├── exc_01_02_02.py │ ├── exc_01_02_03.py │ ├── exc_01_03_01.py │ ├── exc_01_03_02.py │ ├── exc_01_04.py │ ├── exc_01_07.py │ ├── exc_01_08_01.py │ ├── exc_01_08_02.py │ ├── exc_01_09.py │ ├── exc_01_11.py │ ├── exc_01_12_01.py │ ├── exc_01_12_02.py │ ├── exc_01_12_03.py │ ├── exc_02_02_01.py │ ├── exc_02_02_02.py │ ├── exc_02_05_01.py │ ├── exc_02_05_02.py │ ├── exc_02_05_03.py │ ├── exc_02_06.py │ ├── exc_02_07.py │ ├── exc_02_09.py │ ├── exc_02_10_01.py │ ├── exc_02_10_02.py │ ├── exc_02_10_03.py │ ├── exc_02_13.py │ ├── exc_02_14.py │ ├── exc_02_15.py │ ├── exc_03_03.py │ ├── exc_03_06.py │ ├── exc_03_07.py │ ├── exc_03_09_01.py │ ├── exc_03_09_02.py │ ├── exc_03_10_01.py │ ├── exc_03_10_02.py │ ├── exc_03_11.py │ ├── exc_03_12.py │ ├── exc_03_14_01.py │ ├── exc_03_14_02.py │ ├── exc_03_14_03.py │ ├── exc_03_15.py │ ├── exc_03_16_01.py │ ├── exc_03_16_02.py │ ├── exc_04_03.py │ ├── exc_04_04.py │ ├── exc_04_07_01.sh │ ├── exc_04_07_02.sh │ ├── exc_04_08.sh │ ├── exc_04_11.py │ ├── exc_04_12_01.py │ ├── exc_04_12_02.py │ ├── gadgets.json │ ├── iphone.json │ ├── solution_01_02_01.py │ ├── solution_01_02_02.py │ ├── solution_01_02_03.py │ ├── solution_01_03_01.py │ ├── solution_01_03_02.py │ ├── solution_01_04.py │ ├── solution_01_07.py │ ├── solution_01_08_01.py │ ├── solution_01_08_02.py │ ├── solution_01_09.py │ ├── solution_01_11.py │ ├── solution_01_12_01.py │ ├── solution_01_12_02.py │ ├── solution_01_12_03.py │ ├── solution_02_02_01.py │ ├── solution_02_02_02.py │ ├── solution_02_05_01.py │ ├── solution_02_05_02.py │ ├── solution_02_05_03.py │ ├── solution_02_06.py │ ├── solution_02_07.py │ ├── solution_02_09.py │ ├── solution_02_10_01.py │ ├── solution_02_10_02.py │ ├── solution_02_10_03.py │ ├── solution_02_13.py │ ├── solution_02_14.py │ ├── solution_02_15.py │ ├── solution_03_03.py │ ├── solution_03_06.py │ ├── solution_03_07.py │ ├── solution_03_09_01.py │ ├── solution_03_09_02.py │ ├── solution_03_10_01.py │ ├── solution_03_10_02.py │ ├── solution_03_11.py │ ├── solution_03_12.py │ ├── solution_03_14_01.py │ ├── solution_03_14_02.py │ ├── solution_03_14_03.py │ ├── solution_03_15.py │ ├── solution_03_16_01.py │ ├── solution_03_16_02.py │ ├── solution_04_03.py │ ├── solution_04_04.py │ ├── solution_04_07_01.sh │ ├── solution_04_07_02.sh │ ├── solution_04_08.sh │ ├── solution_04_11.py │ ├── solution_04_12_01.py │ ├── solution_04_12_02.py │ ├── test_01_02_01.py │ ├── test_01_02_02.py │ ├── test_01_02_03.py │ ├── test_01_03_01.py │ ├── test_01_03_02.py │ ├── test_01_04.py │ ├── test_01_07.py │ ├── test_01_08_01.py │ ├── test_01_08_02.py │ ├── test_01_09.py │ ├── test_01_11.py │ ├── test_01_12_01.py │ ├── test_01_12_02.py │ ├── test_01_12_03.py │ ├── test_02_02_01.py │ ├── test_02_02_02.py │ ├── test_02_05_01.py │ ├── test_02_05_02.py │ ├── test_02_05_03.py │ ├── test_02_06.py │ ├── test_02_07.py │ ├── test_02_09.py │ ├── test_02_10_01.py │ ├── test_02_10_02.py │ ├── test_02_10_03.py │ ├── test_02_13.py │ ├── test_02_14.py │ ├── test_02_15.py │ ├── test_03_03.py │ ├── test_03_06.py │ ├── test_03_07.py │ ├── test_03_09_01.py │ ├── test_03_09_02.py │ ├── test_03_10_01.py │ ├── test_03_10_02.py │ ├── test_03_11.py │ ├── test_03_12.py │ ├── test_03_14_01.py │ ├── test_03_14_02.py │ ├── test_03_14_03.py │ ├── test_03_15.py │ ├── test_03_16_01.py │ ├── test_03_16_02.py │ ├── test_04_03.py │ ├── test_04_04.py │ ├── test_04_11.py │ ├── test_04_12_01.py │ ├── test_04_12_02.py │ ├── test_general.py │ ├── train_gadget.spacy │ └── tweets.json ├── es │ ├── adidas.json │ ├── bookquotes.json │ ├── capitals.json │ ├── config_gadget.cfg │ ├── countries.json │ ├── country_text.txt │ ├── dev_gadget.spacy │ ├── exc_01_02_01.py │ ├── exc_01_02_02.py │ ├── exc_01_02_03.py │ ├── exc_01_03_01.py │ ├── exc_01_03_02.py │ ├── exc_01_04.py │ ├── exc_01_07.py │ ├── exc_01_08_01.py │ ├── exc_01_08_02.py │ ├── exc_01_09.py │ ├── exc_01_11.py │ ├── exc_01_12_01.py │ ├── exc_01_12_02.py │ ├── exc_01_12_03.py │ ├── exc_02_02_01.py │ ├── exc_02_02_02.py │ ├── exc_02_05_01.py │ ├── exc_02_05_02.py │ ├── exc_02_05_03.py │ ├── exc_02_06.py │ ├── exc_02_07.py │ ├── exc_02_09.py │ ├── exc_02_10_01.py │ ├── exc_02_10_02.py │ ├── exc_02_10_03.py │ ├── exc_02_13.py │ ├── exc_02_14.py │ ├── exc_02_15.py │ ├── exc_03_03.py │ ├── exc_03_06.py │ ├── exc_03_07.py │ ├── exc_03_09_01.py │ ├── exc_03_09_02.py │ ├── exc_03_10_01.py │ ├── exc_03_10_02.py │ ├── exc_03_11.py │ ├── exc_03_12.py │ ├── exc_03_14_01.py │ ├── exc_03_14_02.py │ ├── exc_03_14_03.py │ ├── exc_03_15.py │ ├── exc_03_16_01.py │ ├── exc_03_16_02.py │ ├── exc_04_03.py │ ├── exc_04_04.py │ ├── exc_04_07_01.sh │ ├── exc_04_07_02.sh │ ├── exc_04_08.sh │ ├── exc_04_11.py │ ├── exc_04_12_01.py │ ├── exc_04_12_02.py │ ├── gadgets.json │ ├── ropa.json │ ├── solution_01_02_01.py │ ├── solution_01_02_02.py │ ├── solution_01_02_03.py │ ├── solution_01_03_01.py │ ├── solution_01_03_02.py │ ├── solution_01_04.py │ ├── solution_01_07.py │ ├── solution_01_08_01.py │ ├── solution_01_08_02.py │ ├── solution_01_09.py │ ├── solution_01_11.py │ ├── solution_01_12_01.py │ ├── solution_01_12_02.py │ ├── solution_01_12_03.py │ ├── solution_02_02_01.py │ ├── solution_02_02_02.py │ ├── solution_02_05_01.py │ ├── solution_02_05_02.py │ ├── solution_02_05_03.py │ ├── solution_02_06.py │ ├── solution_02_07.py │ ├── solution_02_09.py │ ├── solution_02_10_01.py │ ├── solution_02_10_02.py │ ├── solution_02_10_03.py │ ├── solution_02_13.py │ ├── solution_02_14.py │ ├── solution_02_15.py │ ├── solution_03_03.py │ ├── solution_03_06.py │ ├── solution_03_07.py │ ├── solution_03_09_01.py │ ├── solution_03_09_02.py │ ├── solution_03_10_01.py │ ├── solution_03_10_02.py │ ├── solution_03_11.py │ ├── solution_03_12.py │ ├── solution_03_14_01.py │ ├── solution_03_14_02.py │ ├── solution_03_14_03.py │ ├── solution_03_15.py │ ├── solution_03_16_01.py │ ├── solution_03_16_02.py │ ├── solution_04_03.py │ ├── solution_04_04.py │ ├── solution_04_07_01.sh │ ├── solution_04_07_02.sh │ ├── solution_04_08.sh │ ├── solution_04_11.py │ ├── solution_04_12_01.py │ ├── solution_04_12_02.py │ ├── test_01_02_01.py │ ├── test_01_02_02.py │ ├── test_01_02_03.py │ ├── test_01_03_01.py │ ├── test_01_03_02.py │ ├── test_01_04.py │ ├── test_01_07.py │ ├── test_01_08_01.py │ ├── test_01_08_02.py │ ├── test_01_09.py │ ├── test_01_11.py │ ├── test_01_12_01.py │ ├── test_01_12_02.py │ ├── test_01_12_03.py │ ├── test_02_02_01.py │ ├── test_02_02_02.py │ ├── test_02_05_01.py │ ├── test_02_05_02.py │ ├── test_02_05_03.py │ ├── test_02_06.py │ ├── test_02_07.py │ ├── test_02_09.py │ ├── test_02_10_01.py │ ├── test_02_10_02.py │ ├── test_02_10_03.py │ ├── test_02_13.py │ ├── test_02_14.py │ ├── test_02_15.py │ ├── test_03_03.py │ ├── test_03_06.py │ ├── test_03_07.py │ ├── test_03_09_01.py │ ├── test_03_09_02.py │ ├── test_03_10_01.py │ ├── test_03_10_02.py │ ├── test_03_11.py │ ├── test_03_12.py │ ├── test_03_14_01.py │ ├── test_03_14_02.py │ ├── test_03_14_03.py │ ├── test_03_15.py │ ├── test_03_16_01.py │ ├── test_03_16_02.py │ ├── test_04_03.py │ ├── test_04_04.py │ ├── test_04_11.py │ ├── test_04_12_01.py │ ├── test_04_12_02.py │ ├── test_general.py │ ├── train_gadget.spacy │ └── tweets.json ├── fr │ ├── bookquotes.json │ ├── capitals.json │ ├── config_gadget.cfg │ ├── countries.json │ ├── country_text.txt │ ├── dev_gadget.spacy │ ├── exc_01_02_01.py │ ├── exc_01_02_02.py │ ├── exc_01_02_03.py │ ├── exc_01_03_01.py │ ├── exc_01_03_02.py │ ├── exc_01_04.py │ ├── exc_01_07.py │ ├── exc_01_08_01.py │ ├── exc_01_08_02.py │ ├── exc_01_09.py │ ├── exc_01_11.py │ ├── exc_01_12_01.py │ ├── exc_01_12_02.py │ ├── exc_01_12_03.py │ ├── exc_02_02_01.py │ ├── exc_02_02_02.py │ ├── exc_02_05_01.py │ ├── exc_02_05_02.py │ ├── exc_02_05_03.py │ ├── exc_02_06.py │ ├── exc_02_07.py │ ├── exc_02_09.py │ ├── exc_02_10_01.py │ ├── exc_02_10_02.py │ ├── exc_02_10_03.py │ ├── exc_02_13.py │ ├── exc_02_14.py │ ├── exc_02_15.py │ ├── exc_03_03.py │ ├── exc_03_06.py │ ├── exc_03_07.py │ ├── exc_03_09_01.py │ ├── exc_03_09_02.py │ ├── exc_03_10_01.py │ ├── exc_03_10_02.py │ ├── exc_03_11.py │ ├── exc_03_12.py │ ├── exc_03_14_01.py │ ├── exc_03_14_02.py │ ├── exc_03_14_03.py │ ├── exc_03_15.py │ ├── exc_03_16_01.py │ ├── exc_03_16_02.py │ ├── exc_04_03.py │ ├── exc_04_04.py │ ├── exc_04_07.py │ ├── exc_04_07_01.sh │ ├── exc_04_07_02.sh │ ├── exc_04_08.sh │ ├── exc_04_11.py │ ├── exc_04_12_01.py │ ├── exc_04_12_02.py │ ├── gadgets.json │ ├── iphone.json │ ├── solution_01_02_01.py │ ├── solution_01_02_02.py │ ├── solution_01_02_03.py │ ├── solution_01_03_01.py │ ├── solution_01_03_02.py │ ├── solution_01_04.py │ ├── solution_01_07.py │ ├── solution_01_08_01.py │ ├── solution_01_08_02.py │ ├── solution_01_09.py │ ├── solution_01_11.py │ ├── solution_01_12_01.py │ ├── solution_01_12_02.py │ ├── solution_01_12_03.py │ ├── solution_02_02_01.py │ ├── solution_02_02_02.py │ ├── solution_02_05_01.py │ ├── solution_02_05_02.py │ ├── solution_02_05_03.py │ ├── solution_02_06.py │ ├── solution_02_07.py │ ├── solution_02_09.py │ ├── solution_02_10_01.py │ ├── solution_02_10_02.py │ ├── solution_02_10_03.py │ ├── solution_02_13.py │ ├── solution_02_14.py │ ├── solution_02_15.py │ ├── solution_03_03.py │ ├── solution_03_06.py │ ├── solution_03_07.py │ ├── solution_03_09_01.py │ ├── solution_03_09_02.py │ ├── solution_03_10_01.py │ ├── solution_03_10_02.py │ ├── solution_03_11.py │ ├── solution_03_12.py │ ├── solution_03_14_01.py │ ├── solution_03_14_02.py │ ├── solution_03_14_03.py │ ├── solution_03_15.py │ ├── solution_03_16_01.py │ ├── solution_03_16_02.py │ ├── solution_04_03.py │ ├── solution_04_04.py │ ├── solution_04_07_01.sh │ ├── solution_04_07_02.sh │ ├── solution_04_08.sh │ ├── solution_04_11.py │ ├── solution_04_12_01.py │ ├── solution_04_12_02.py │ ├── test_01_02_01.py │ ├── test_01_02_02.py │ ├── test_01_02_03.py │ ├── test_01_03_01.py │ ├── test_01_03_02.py │ ├── test_01_04.py │ ├── test_01_07.py │ ├── test_01_08_01.py │ ├── test_01_08_02.py │ ├── test_01_09.py │ ├── test_01_11.py │ ├── test_01_12_01.py │ ├── test_01_12_02.py │ ├── test_01_12_03.py │ ├── test_02_02_01.py │ ├── test_02_02_02.py │ ├── test_02_05_01.py │ ├── test_02_05_02.py │ ├── test_02_05_03.py │ ├── test_02_06.py │ ├── test_02_07.py │ ├── test_02_09.py │ ├── test_02_10_01.py │ ├── test_02_10_02.py │ ├── test_02_10_03.py │ ├── test_02_13.py │ ├── test_02_14.py │ ├── test_02_15.py │ ├── test_03_03.py │ ├── test_03_06.py │ ├── test_03_07.py │ ├── test_03_09_01.py │ ├── test_03_09_02.py │ ├── test_03_10_01.py │ ├── test_03_10_02.py │ ├── test_03_11.py │ ├── test_03_12.py │ ├── test_03_14_01.py │ ├── test_03_14_02.py │ ├── test_03_14_03.py │ ├── test_03_15.py │ ├── test_03_16_01.py │ ├── test_03_16_02.py │ ├── test_04_03.py │ ├── test_04_04.py │ ├── test_04_07.py │ ├── test_04_11.py │ ├── test_04_12_01.py │ ├── test_04_12_02.py │ ├── test_general.py │ ├── train_gadget.spacy │ └── tweets.json ├── ja │ ├── bookquotes.json │ ├── capitals.json │ ├── config_gadget.cfg │ ├── countries.json │ ├── country_text.txt │ ├── dev_gadget.spacy │ ├── exc_01_02_01.py │ ├── exc_01_02_02.py │ ├── exc_01_02_03.py │ ├── exc_01_02_04.py │ ├── exc_01_03_01.py │ ├── exc_01_03_02.py │ ├── exc_01_04.py │ ├── exc_01_07.py │ ├── exc_01_08_01.py │ ├── exc_01_08_02.py │ ├── exc_01_09.py │ ├── exc_01_11.py │ ├── exc_01_12_01.py │ ├── exc_01_12_02.py │ ├── exc_01_12_03.py │ ├── exc_02_02_01.py │ ├── exc_02_02_02.py │ ├── exc_02_05_01.py │ ├── exc_02_05_02.py │ ├── exc_02_05_03.py │ ├── exc_02_06.py │ ├── exc_02_07.py │ ├── exc_02_09.py │ ├── exc_02_10_01.py │ ├── exc_02_10_02.py │ ├── exc_02_10_03.py │ ├── exc_02_13.py │ ├── exc_02_14.py │ ├── exc_02_15.py │ ├── exc_03_03.py │ ├── exc_03_06.py │ ├── exc_03_07.py │ ├── exc_03_09_01.py │ ├── exc_03_09_02.py │ ├── exc_03_10_01.py │ ├── exc_03_10_02.py │ ├── exc_03_11.py │ ├── exc_03_12.py │ ├── exc_03_14_01.py │ ├── exc_03_14_02.py │ ├── exc_03_14_03.py │ ├── exc_03_15.py │ ├── exc_03_16_01.py │ ├── exc_03_16_02.py │ ├── exc_04_03.py │ ├── exc_04_04.py │ ├── exc_04_06.py │ ├── exc_04_07_01.sh │ ├── exc_04_07_02.sh │ ├── exc_04_08.sh │ ├── exc_04_11.py │ ├── exc_04_12_01.py │ ├── exc_04_12_02.py │ ├── gadgets.json │ ├── iphone.json │ ├── solution_01_02_01.py │ ├── solution_01_02_02.py │ ├── solution_01_02_03.py │ ├── solution_01_02_04.py │ ├── solution_01_03_01.py │ ├── solution_01_03_02.py │ ├── solution_01_04.py │ ├── solution_01_07.py │ ├── solution_01_08_01.py │ ├── solution_01_08_02.py │ ├── solution_01_09.py │ ├── solution_01_11.py │ ├── solution_01_12_01.py │ ├── solution_01_12_02.py │ ├── solution_01_12_03.py │ ├── solution_02_02_01.py │ ├── solution_02_02_02.py │ ├── solution_02_05_01.py │ ├── solution_02_05_02.py │ ├── solution_02_05_03.py │ ├── solution_02_06.py │ ├── solution_02_07.py │ ├── solution_02_09.py │ ├── solution_02_10_01.py │ ├── solution_02_10_02.py │ ├── solution_02_10_03.py │ ├── solution_02_13.py │ ├── solution_02_14.py │ ├── solution_02_15.py │ ├── solution_03_03.py │ ├── solution_03_06.py │ ├── solution_03_07.py │ ├── solution_03_09_01.py │ ├── solution_03_09_02.py │ ├── solution_03_10_01.py │ ├── solution_03_10_02.py │ ├── solution_03_11.py │ ├── solution_03_12.py │ ├── solution_03_14_01.py │ ├── solution_03_14_02.py │ ├── solution_03_14_03.py │ ├── solution_03_15.py │ ├── solution_03_16_01.py │ ├── solution_03_16_02.py │ ├── solution_04_03.py │ ├── solution_04_04.py │ ├── solution_04_06.py │ ├── solution_04_07_01.sh │ ├── solution_04_07_02.sh │ ├── solution_04_08.sh │ ├── solution_04_11.py │ ├── solution_04_12_01.py │ ├── solution_04_12_02.py │ ├── test_01_02_01.py │ ├── test_01_02_02.py │ ├── test_01_02_03.py │ ├── test_01_02_04.py │ ├── test_01_03_01.py │ ├── test_01_03_02.py │ ├── test_01_04.py │ ├── test_01_07.py │ ├── test_01_08_01.py │ ├── test_01_08_02.py │ ├── test_01_09.py │ ├── test_01_11.py │ ├── test_01_12_01.py │ ├── test_01_12_02.py │ ├── test_01_12_03.py │ ├── test_02_02_01.py │ ├── test_02_02_02.py │ ├── test_02_05_01.py │ ├── test_02_05_02.py │ ├── test_02_05_03.py │ ├── test_02_06.py │ ├── test_02_07.py │ ├── test_02_09.py │ ├── test_02_10_01.py │ ├── test_02_10_02.py │ ├── test_02_10_03.py │ ├── test_02_13.py │ ├── test_02_14.py │ ├── test_02_15.py │ ├── test_03_03.py │ ├── test_03_06.py │ ├── test_03_07.py │ ├── test_03_09_01.py │ ├── test_03_09_02.py │ ├── test_03_10_01.py │ ├── test_03_10_02.py │ ├── test_03_11.py │ ├── test_03_12.py │ ├── test_03_14_01.py │ ├── test_03_14_02.py │ ├── test_03_14_03.py │ ├── test_03_15.py │ ├── test_03_16_01.py │ ├── test_03_16_02.py │ ├── test_04_03.py │ ├── test_04_04.py │ ├── test_04_06.py │ ├── test_04_07.py │ ├── test_04_10.py │ ├── test_04_11.py │ ├── test_04_12_01.py │ ├── test_04_12_02.py │ ├── train_gadget.spacy │ └── tweets.json ├── pt │ ├── bookquotes.json │ ├── capitals.json │ ├── config_gadget.cfg │ ├── countries.json │ ├── country_text.txt │ ├── dev_gadget.spacy │ ├── exc_01_02_01.py │ ├── exc_01_02_02.py │ ├── exc_01_02_03.py │ ├── exc_01_03_01.py │ ├── exc_01_03_02.py │ ├── exc_01_04.py │ ├── exc_01_07.py │ ├── exc_01_08_01.py │ ├── exc_01_08_02.py │ ├── exc_01_09.py │ ├── exc_01_11.py │ ├── exc_01_12_01.py │ ├── exc_01_12_02.py │ ├── exc_01_12_03.py │ ├── exc_02_02_01.py │ ├── exc_02_02_02.py │ ├── exc_02_05_01.py │ ├── exc_02_05_02.py │ ├── exc_02_05_03.py │ ├── exc_02_06.py │ ├── exc_02_07.py │ ├── exc_02_09.py │ ├── exc_02_10_01.py │ ├── exc_02_10_02.py │ ├── exc_02_10_03.py │ ├── exc_02_13.py │ ├── exc_02_14.py │ ├── exc_02_15.py │ ├── exc_03_03.py │ ├── exc_03_06.py │ ├── exc_03_07.py │ ├── exc_03_09_01.py │ ├── exc_03_09_02.py │ ├── exc_03_10_01.py │ ├── exc_03_10_02.py │ ├── exc_03_11.py │ ├── exc_03_12.py │ ├── exc_03_14_01.py │ ├── exc_03_14_02.py │ ├── exc_03_14_03.py │ ├── exc_03_15.py │ ├── exc_03_16_01.py │ ├── exc_03_16_02.py │ ├── exc_04_03.py │ ├── exc_04_04.py │ ├── exc_04_07_01.sh │ ├── exc_04_07_02.sh │ ├── exc_04_08.sh │ ├── exc_04_11.py │ ├── exc_04_12_01.py │ ├── exc_04_12_02.py │ ├── gadgets.json │ ├── iphone.json │ ├── solution_01_02_01.py │ ├── solution_01_02_02.py │ ├── solution_01_02_03.py │ ├── solution_01_03_01.py │ ├── solution_01_03_02.py │ ├── solution_01_04.py │ ├── solution_01_07.py │ ├── solution_01_08_01.py │ ├── solution_01_08_02.py │ ├── solution_01_09.py │ ├── solution_01_11.py │ ├── solution_01_12_01.py │ ├── solution_01_12_02.py │ ├── solution_01_12_03.py │ ├── solution_02_02_01.py │ ├── solution_02_02_02.py │ ├── solution_02_05_01.py │ ├── solution_02_05_02.py │ ├── solution_02_05_03.py │ ├── solution_02_06.py │ ├── solution_02_07.py │ ├── solution_02_09.py │ ├── solution_02_10_01.py │ ├── solution_02_10_02.py │ ├── solution_02_10_03.py │ ├── solution_02_13.py │ ├── solution_02_14.py │ ├── solution_02_15.py │ ├── solution_03_03.py │ ├── solution_03_06.py │ ├── solution_03_07.py │ ├── solution_03_09_01.py │ ├── solution_03_09_02.py │ ├── solution_03_10_01.py │ ├── solution_03_10_02.py │ ├── solution_03_11.py │ ├── solution_03_12.py │ ├── solution_03_14_01.py │ ├── solution_03_14_02.py │ ├── solution_03_14_03.py │ ├── solution_03_15.py │ ├── solution_03_16_01.py │ ├── solution_03_16_02.py │ ├── solution_04_03.py │ ├── solution_04_04.py │ ├── solution_04_07_01.sh │ ├── solution_04_07_02.sh │ ├── solution_04_08.sh │ ├── solution_04_11.py │ ├── solution_04_12_01.py │ ├── solution_04_12_02.py │ ├── test_01_02_01.py │ ├── test_01_02_02.py │ ├── test_01_02_03.py │ ├── test_01_03_01.py │ ├── test_01_03_02.py │ ├── test_01_04.py │ ├── test_01_07.py │ ├── test_01_08_01.py │ ├── test_01_08_02.py │ ├── test_01_09.py │ ├── test_01_11.py │ ├── test_01_12_01.py │ ├── test_01_12_02.py │ ├── test_01_12_03.py │ ├── test_02_02_01.py │ ├── test_02_02_02.py │ ├── test_02_05_01.py │ ├── test_02_05_02.py │ ├── test_02_05_03.py │ ├── test_02_06.py │ ├── test_02_07.py │ ├── test_02_09.py │ ├── test_02_10_01.py │ ├── test_02_10_02.py │ ├── test_02_10_03.py │ ├── test_02_13.py │ ├── test_02_14.py │ ├── test_02_15.py │ ├── test_03_03.py │ ├── test_03_06.py │ ├── test_03_07.py │ ├── test_03_09_01.py │ ├── test_03_09_02.py │ ├── test_03_10_01.py │ ├── test_03_10_02.py │ ├── test_03_11.py │ ├── test_03_12.py │ ├── test_03_14_01.py │ ├── test_03_14_02.py │ ├── test_03_14_03.py │ ├── test_03_15.py │ ├── test_03_16_01.py │ ├── test_03_16_02.py │ ├── test_04_03.py │ ├── test_04_04.py │ ├── test_04_11.py │ ├── test_04_12_01.py │ ├── test_04_12_02.py │ ├── train_gadget.spacy │ └── tweets.json └── zh │ ├── bookquotes.json │ ├── capitals.json │ ├── config_gadget.cfg │ ├── countries.json │ ├── country_text.txt │ ├── dev_gadget.spacy │ ├── exc_01_02_01.py │ ├── exc_01_02_02.py │ ├── exc_01_02_03.py │ ├── exc_01_02_04.py │ ├── exc_01_03_01.py │ ├── exc_01_03_02.py │ ├── exc_01_04.py │ ├── exc_01_07.py │ ├── exc_01_08_01.py │ ├── exc_01_08_02.py │ ├── exc_01_09.py │ ├── exc_01_11.py │ ├── exc_01_12_01.py │ ├── exc_01_12_02.py │ ├── exc_01_12_03.py │ ├── exc_02_02_01.py │ ├── exc_02_02_02.py │ ├── exc_02_05_01.py │ ├── exc_02_05_02.py │ ├── exc_02_05_03.py │ ├── exc_02_06.py │ ├── exc_02_07.py │ ├── exc_02_09.py │ ├── exc_02_10_01.py │ ├── exc_02_10_02.py │ ├── exc_02_10_03.py │ ├── exc_02_13.py │ ├── exc_02_14.py │ ├── exc_02_15.py │ ├── exc_03_03.py │ ├── exc_03_06.py │ ├── exc_03_07.py │ ├── exc_03_09_01.py │ ├── exc_03_09_02.py │ ├── exc_03_10_01.py │ ├── exc_03_10_02.py │ ├── exc_03_11.py │ ├── exc_03_12.py │ ├── exc_03_14_01.py │ ├── exc_03_14_02.py │ ├── exc_03_14_03.py │ ├── exc_03_15.py │ ├── exc_03_16_01.py │ ├── exc_03_16_02.py │ ├── exc_04_03.py │ ├── exc_04_04.py │ ├── exc_04_07_01.sh │ ├── exc_04_07_02.sh │ ├── exc_04_08.sh │ ├── exc_04_11.py │ ├── exc_04_12_01.py │ ├── exc_04_12_02.py │ ├── gadgets.json │ ├── iphone.json │ ├── solution_01_02_01.py │ ├── solution_01_02_02.py │ ├── solution_01_02_03.py │ ├── solution_01_02_04.py │ ├── solution_01_03_01.py │ ├── solution_01_03_02.py │ ├── solution_01_04.py │ ├── solution_01_07.py │ ├── solution_01_08_01.py │ ├── solution_01_08_02.py │ ├── solution_01_09.py │ ├── solution_01_11.py │ ├── solution_01_12_01.py │ ├── solution_01_12_02.py │ ├── solution_01_12_03.py │ ├── solution_02_02_01.py │ ├── solution_02_02_02.py │ ├── solution_02_05_01.py │ ├── solution_02_05_02.py │ ├── solution_02_05_03.py │ ├── solution_02_06.py │ ├── solution_02_07.py │ ├── solution_02_09.py │ ├── solution_02_10_01.py │ ├── solution_02_10_02.py │ ├── solution_02_10_03.py │ ├── solution_02_13.py │ ├── solution_02_14.py │ ├── solution_02_15.py │ ├── solution_03_03.py │ ├── solution_03_06.py │ ├── solution_03_07.py │ ├── solution_03_09_01.py │ ├── solution_03_09_02.py │ ├── solution_03_10_01.py │ ├── solution_03_10_02.py │ ├── solution_03_11.py │ ├── solution_03_12.py │ ├── solution_03_14_01.py │ ├── solution_03_14_02.py │ ├── solution_03_14_03.py │ ├── solution_03_15.py │ ├── solution_03_16_01.py │ ├── solution_03_16_02.py │ ├── solution_04_03.py │ ├── solution_04_04.py │ ├── solution_04_07_01.sh │ ├── solution_04_07_02.sh │ ├── solution_04_08.sh │ ├── solution_04_11.py │ ├── solution_04_12_01.py │ ├── solution_04_12_02.py │ ├── test_01_02_01.py │ ├── test_01_02_02.py │ ├── test_01_02_03.py │ ├── test_01_02_04.py │ ├── test_01_03_01.py │ ├── test_01_03_02.py │ ├── test_01_04.py │ ├── test_01_07.py │ ├── test_01_08_01.py │ ├── test_01_08_02.py │ ├── test_01_09.py │ ├── test_01_11.py │ ├── test_01_12_01.py │ ├── test_01_12_02.py │ ├── test_01_12_03.py │ ├── test_02_02_01.py │ ├── test_02_02_02.py │ ├── test_02_05_01.py │ ├── test_02_05_02.py │ ├── test_02_05_03.py │ ├── test_02_06.py │ ├── test_02_07.py │ ├── test_02_09.py │ ├── test_02_10_01.py │ ├── test_02_10_02.py │ ├── test_02_10_03.py │ ├── test_02_13.py │ ├── test_02_14.py │ ├── test_02_15.py │ ├── test_03_03.py │ ├── test_03_06.py │ ├── test_03_07.py │ ├── test_03_09_01.py │ ├── test_03_09_02.py │ ├── test_03_10_01.py │ ├── test_03_10_02.py │ ├── test_03_11.py │ ├── test_03_12.py │ ├── test_03_14_01.py │ ├── test_03_14_02.py │ ├── test_03_14_03.py │ ├── test_03_15.py │ ├── test_03_16_01.py │ ├── test_03_16_02.py │ ├── test_04_03.py │ ├── test_04_04.py │ ├── test_04_11.py │ ├── test_04_12_01.py │ ├── test_04_12_02.py │ ├── test_general.py │ ├── train_gadget.spacy │ └── weibo.json ├── gatsby-browser.js ├── gatsby-config.js ├── gatsby-node.js ├── locale.json ├── meta.json ├── netlify.toml ├── package-lock.json ├── package.json ├── src ├── components │ ├── button.js │ ├── choice.js │ ├── code.js │ ├── exercise.js │ ├── hint.js │ ├── home.js │ ├── juniper.js │ ├── layout.js │ ├── link.js │ ├── logo.js │ ├── seo.js │ ├── slides.js │ └── typography.js ├── context.js ├── markdown.js ├── pages │ ├── de.js │ ├── en.js │ ├── es.js │ ├── fr.js │ ├── index.js │ ├── ja.js │ ├── pt.js │ └── zh.js ├── styles │ ├── button.module.sass │ ├── chapter.module.sass │ ├── choice.module.sass │ ├── code.module.sass │ ├── exercise.module.sass │ ├── hint.module.sass │ ├── index.module.sass │ ├── index.sass │ ├── layout.module.sass │ ├── link.module.sass │ ├── plyr.css │ ├── reveal.css │ ├── slides.module.sass │ └── typography.module.sass └── templates │ └── chapter.js ├── static ├── dep_example.png ├── dep_example_de.png ├── dep_example_es.png ├── dep_example_fr.png ├── dep_example_ja.png ├── dep_example_zh.png ├── doc.png ├── doc_span.png ├── icon.png ├── icon_check.svg ├── icon_slides.svg ├── icon_video.svg ├── logos.svg ├── ner_example.png ├── ner_example_de.png ├── ner_example_es.png ├── ner_example_fr.png ├── ner_example_ja.png ├── ner_example_zh.png ├── package.png ├── package_de.png ├── package_es.png ├── package_fr.png ├── package_ja.png ├── package_meta.png ├── package_meta_de.png ├── package_meta_es.png ├── package_meta_fr.png ├── package_meta_zh.png ├── package_zh.png ├── pipeline.png ├── profile.jpg ├── social.jpg ├── social_de.jpg ├── social_es.jpg ├── social_fr.jpg ├── social_ja.jpg ├── social_pt.jpg ├── social_zh.jpg ├── span_indices.png ├── training.png ├── training_de.png ├── training_es.png ├── training_fr.png ├── training_zh.png ├── vocab_stringstore.png ├── vocab_stringstore_de.png ├── vocab_stringstore_es.png ├── vocab_stringstore_fr.png ├── vocab_stringstore_zh.png └── website.png └── theme.sass /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore everything 2 | * 3 | 4 | # Allow files and directories 5 | !/docker/** 6 | !gatsby* 7 | !package*.json 8 | !binder/requirements.txt -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | globals: { 3 | __PATH_PREFIX__: true, 4 | }, 5 | extends: `react-app`, 6 | } 7 | -------------------------------------------------------------------------------- /exercises/de/dev_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/de/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/de/exc_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy 2 | import ____ 3 | 4 | # Erstelle ein deutsches nlp-Objekt 5 | nlp = ____ 6 | 7 | # Verarbeite einen Text 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Drucke den Text des Dokuments 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/de/exc_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy 2 | import ____ 3 | 4 | # Erstelle ein englisches nlp-Objekt 5 | nlp = ____ 6 | 7 | # Verarbeite einen Text 8 | doc = nlp("This is a sentence.") 9 | 10 | # Drucke den Text des Dokuments 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/de/exc_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy 2 | import ____ 3 | 4 | # Erstelle ein spanisches nlp-Objekt 5 | nlp = ____ 6 | 7 | # Verarbeite einen Text 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Drucke den Text des Dokuments 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/de/exc_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy und erstelle ein deutsches nlp-Objekt 2 | import ____ 3 | 4 | nlp = ____ 5 | 6 | # Verarbeite den Text 7 | doc = ____("Ich mag niedliche Katzen und Faultiere.") 8 | 9 | # Wähle den ersten Token aus 10 | erster_token = doc[____] 11 | 12 | # Drucke den Text des ersten Tokens 13 | print(erster_token.____) 14 | -------------------------------------------------------------------------------- /exercises/de/exc_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Lade die Pipeline "de_core_news_sm" 4 | nlp = ____ 5 | 6 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet." 7 | 8 | # Verarbeite den Text 9 | doc = ____ 10 | 11 | # Drucke den Text des Dokuments 12 | print(____.____) 13 | -------------------------------------------------------------------------------- /exercises/de/exc_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("de_core_news_sm") 4 | 5 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet." 6 | 7 | # Verarbeite den Text 8 | doc = ____ 9 | 10 | # Iteriere über die vorhergesagten Entitäten 11 | for ent in ____.____: 12 | # Drucke den Text und das Label der Entität 13 | print(ent.____, ____.____) 14 | -------------------------------------------------------------------------------- /exercises/de/exc_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | doc = nlp("Ich habe eine Katze") 5 | 6 | # Schlage den Hash für das Wort "Katze" nach 7 | katze_hash = ____.____.____[____] 8 | print(katze_hash) 9 | 10 | # Schlage katze_hash nach, um den String zu erhalten 11 | katze_string = ____.____.____[____] 12 | print(katze_string) 13 | -------------------------------------------------------------------------------- /exercises/de/exc_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | doc = nlp("David Bowie hat das Label PER") 5 | 6 | # Schlage den Hash für das String-Label "PER" nach 7 | person_hash = ____.____.____[____] 8 | print(person_hash) 9 | 10 | # Schlage person_hash nach, um den String zu erhalten 11 | person_string = ____.____.____[____] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/de/exc_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | # Importiere die Klasse Doc 6 | from ____ import ____ 7 | 8 | # Erwarteter Text: "spaCy ist cool!" 9 | words = ["spaCy", "ist", "cool", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen 13 | doc = ____(____, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/de/exc_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | # Importiere die Klasse Doc 6 | from ____ import ____ 7 | 8 | # Erwarteter Text: "Na, alles klar?" 9 | words = ["Na", ",", "alles", "klar", "?"] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/de/exc_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | # Importiere die Klasse Doc 6 | from ____ import ____ 7 | 8 | # Erwarteter Text: "Was, echt?!" 9 | words = [____, ____, ____, ____, ____] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/de/exc_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Lade die Pipeline "en_core_web_md" 4 | nlp = ____ 5 | 6 | # Verarbeite einen Text 7 | doc = nlp("Two bananas in pyjamas") 8 | 9 | # Wähle den Vector des Tokens "bananas" aus 10 | bananas_vector = ____.____ 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/de/exc_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc1 = nlp("It's a warm summer day") 6 | doc2 = nlp("It's sunny outside") 7 | 8 | # Berechne die Ähnlichkeit von doc1 und doc2 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/de/exc_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("TV and books") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Berechne die Ähnlichkeit der Tokens "TV" und "books" 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/de/exc_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.") 6 | 7 | # Erstelle Spans für "great restaurant" und "really nice bar" 8 | span1 = ____ 9 | span2 = ____ 10 | 11 | # Berechne die Ähnlichkeit der beiden Spans 12 | similarity = ____.____(____) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/de/exc_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Lade die Pipeline "de_core_news_sm" 4 | nlp = ____ 5 | 6 | # Drucke die Namen der Pipeline-Komponenten 7 | print(____.____) 8 | 9 | # Drucke die komplette Pipeline mit (name, component) Tuples 10 | print(____.____) 11 | -------------------------------------------------------------------------------- /exercises/de/exc_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("de_core_news_sm") 5 | 6 | with open("exercises/de/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Verarbeite den Text und drucke die Nomen 10 | for text in TEXTS: 11 | doc = nlp(text) 12 | print([token.text for token in doc if token.pos_ == "NOUN"]) 13 | -------------------------------------------------------------------------------- /exercises/de/exc_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("de_core_news_sm") 5 | 6 | with open("exercises/de/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Verarbeite den Text und drucke die Entitäten 10 | docs = [nlp(text) for text in TEXTS] 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/de/exc_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Erstelle eine Liste von Patterns für den PhraseMatcher 8 | patterns = [nlp(person) for person in people] 9 | -------------------------------------------------------------------------------- /exercises/de/exc_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("de_core_news_sm") 4 | text = ( 5 | "Chick-fil-A, ein Wortspiel mit der amerikanischen Aussprache von „Filet“, " 6 | "ist der Name einer 1946 gegründeten amerikanischen Schnellrestaurantkette, " 7 | "die sich auf den Verkauf von Hühnerfleischprodukten spezialisiert hat." 8 | ) 9 | 10 | # Wende nur den Tokenizer an 11 | doc = nlp(text) 12 | print([token.text for token in doc]) 13 | -------------------------------------------------------------------------------- /exercises/de/exc_03_16_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("de_core_news_sm") 4 | text = ( 5 | "Die McDonald’s Corporation ist ein Betreiber und Franchisegeber von " 6 | "weltweit vertretenen Schnellrestaurants." 7 | ) 8 | 9 | # Deaktiviere den Tagger und den Lemmatizer 10 | with ____.____(____): 11 | # Verarbeite den Text 12 | doc = ____ 13 | # Drucke die Entitäten im Doc 14 | print(____) 15 | -------------------------------------------------------------------------------- /exercises/de/exc_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ ____ --____ ____ --____ ____ 2 | -------------------------------------------------------------------------------- /exercises/de/exc_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/de/exc_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____ 2 | -------------------------------------------------------------------------------- /exercises/de/iphone.json: -------------------------------------------------------------------------------- 1 | [ 2 | "iPhone X vorbestellen: So geht's", 3 | "Das iPhone X kommt bald", 4 | "soll ich 1000 € für das neue iphone x ausgeben?", 5 | "Die Testberichte des iPhone 8 sind da", 6 | "iPhone 11 vs iPhone 8: Ein Quantensprung", 7 | "Ich brauche ein neues Smartphone. Hat jemand Tipps?" 8 | ] 9 | -------------------------------------------------------------------------------- /exercises/de/solution_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy 2 | import spacy 3 | 4 | # Erstelle ein deutsches nlp-Objekt 5 | nlp = spacy.blank("de") 6 | 7 | # Verarbeite einen Text 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Drucke den Text des Dokuments 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/de/solution_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy 2 | import spacy 3 | 4 | # Erstelle ein englisches nlp-Objekt 5 | nlp = spacy.blank("en") 6 | 7 | # Verarbeite einen Text 8 | doc = nlp("This is a sentence.") 9 | 10 | # Drucke den Text des Dokuments 11 | print(doc.text) -------------------------------------------------------------------------------- /exercises/de/solution_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy 2 | import spacy 3 | 4 | # Erstelle ein spanisches nlp-Objekt 5 | nlp = spacy.blank("es") 6 | 7 | # Verarbeite einen Text 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Drucke den Text des Dokuments 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/de/solution_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importiere spaCy und erstelle ein deutsches nlp-Objekt 2 | import spacy 3 | 4 | nlp = spacy.blank("de") 5 | 6 | # Verarbeite den Text 7 | doc = nlp("Ich mag niedliche Katzen und Faultiere.") 8 | 9 | # Wähle den ersten Token aus 10 | erster_token = doc[0] 11 | 12 | # Drucke den Text des ersten Tokens 13 | print(erster_token.text) 14 | -------------------------------------------------------------------------------- /exercises/de/solution_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Lade die Pipeline "de_core_news_sm" 4 | nlp = spacy.load("de_core_news_sm") 5 | 6 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet." 7 | 8 | # Verarbeite den Text 9 | doc = nlp(text) 10 | 11 | # Drucke den Text des Dokuments 12 | print(doc.text) 13 | -------------------------------------------------------------------------------- /exercises/de/solution_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("de_core_news_sm") 4 | 5 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet." 6 | 7 | # Verarbeite den Text 8 | doc = nlp(text) 9 | 10 | # Iteriere über die vorhergesagten Entitäten 11 | for ent in doc.ents: 12 | # Drucke den Text und das Label der Entität 13 | print(ent.text, ent.label_) 14 | -------------------------------------------------------------------------------- /exercises/de/solution_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | doc = nlp("Ich habe eine Katze") 5 | 6 | # Schlage den Hash für das Wort "Katze" nach 7 | katze_hash = nlp.vocab.strings["Katze"] 8 | print(katze_hash) 9 | 10 | # Schlage katze_hash nach, um den String zu erhalten 11 | katze_string = nlp.vocab.strings[katze_hash] 12 | print(katze_string) 13 | -------------------------------------------------------------------------------- /exercises/de/solution_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | doc = nlp("David Bowie hat das Label PER") 5 | 6 | # Schlage den Hash für das String-Label "PER" nach 7 | person_hash = nlp.vocab.strings["PER"] 8 | print(person_hash) 9 | 10 | # Schlage person_hash nach, um den String zu erhalten 11 | person_string = nlp.vocab.strings[person_hash] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/de/solution_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | # Importiere die Klasse Doc 6 | from spacy.tokens import Doc 7 | 8 | # Erwarteter Text: "spaCy ist cool!" 9 | words = ["spaCy", "ist", "cool", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/de/solution_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | # Importiere die Klasse Doc 6 | from spacy.tokens import Doc 7 | 8 | # Erwarteter Text: "Na, alles klar?" 9 | words = ["Na", ",", "alles", "klar", "?"] 10 | spaces = [False, True, True, False, False] 11 | 12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/de/solution_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | # Importiere die Klasse Doc 6 | from spacy.tokens import Doc 7 | 8 | # Erwarteter Text: "Was, echt?!" 9 | words = ["Was", ",", "echt", "?", "!"] 10 | spaces = [False, True, False, False, False] 11 | 12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/de/solution_02_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("de_core_news_sm") 4 | doc = nlp("Berlin gefällt mir sehr gut") 5 | 6 | # Iteriere über die Tokens 7 | for token in doc: 8 | # Teste, ob der aktuelle Token ein Eigenname ist 9 | if token.pos_ == "PROPN": 10 | # Teste, ob der nächste Token ein Verb ist 11 | if doc[token.i + 1].pos_ == "VERB": 12 | print("Eigenname vor Verb gefunden:", token.text) 13 | -------------------------------------------------------------------------------- /exercises/de/solution_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Lade die Pipeline "en_core_web_md" 4 | nlp = spacy.load("en_core_web_md") 5 | 6 | # Verarbeite einen Text 7 | doc = nlp("Two bananas in pyjamas") 8 | 9 | # Wähle den Vector des Tokens "bananas" aus 10 | bananas_vector = doc[1].vector 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/de/solution_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc1 = nlp("It's a warm summer day") 6 | doc2 = nlp("It's sunny outside") 7 | 8 | # Berechne die Ähnlichkeit von doc1 und doc2 9 | similarity = doc1.similarity(doc2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/de/solution_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("TV and books") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Berechne die Ähnlichkeit der Tokens "TV" und "books" 9 | similarity = token1.similarity(token2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/de/solution_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.") 6 | 7 | # Erstelle Spans für "great restaurant" und "really nice bar" 8 | span1 = doc[3:5] 9 | span2 = doc[12:15] 10 | 11 | # Berechne die Ähnlichkeit der beiden Spans 12 | similarity = span1.similarity(span2) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/de/solution_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Lade die Pipeline "de_core_news_sm" 4 | nlp = spacy.load("de_core_news_sm") 5 | 6 | # Drucke die Namen der Pipeline-Komponenten 7 | print(nlp.pipe_names) 8 | 9 | # Drucke die komplette Pipeline mit (name, component) Tuples 10 | print(nlp.pipeline) 11 | -------------------------------------------------------------------------------- /exercises/de/solution_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("de_core_news_sm") 5 | 6 | with open("exercises/de/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Verarbeite den Text und drucke die Nomen 10 | for doc in nlp.pipe(TEXTS): 11 | print([token.text for token in doc if token.pos_ == "NOUN"]) 12 | -------------------------------------------------------------------------------- /exercises/de/solution_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("de_core_news_sm") 5 | 6 | with open("exercises/de/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Verarbeite den Text und drucke die Entitäten 10 | docs = list(nlp.pipe(TEXTS)) 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/de/solution_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("de") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Erstelle eine Liste von Patterns für den PhraseMatcher 8 | patterns = list(nlp.pipe(people)) 9 | -------------------------------------------------------------------------------- /exercises/de/solution_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy init config ./config.cfg --lang de --pipeline ner 2 | -------------------------------------------------------------------------------- /exercises/de/solution_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/de/solution_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy train ./exercises/de/config_gadget.cfg --output ./output --paths.train ./exercises/de/train_gadget.spacy --paths.dev ./exercises/de/dev_gadget.spacy 2 | -------------------------------------------------------------------------------- /exercises/de/test_02_05_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "from spacy.tokens import Doc" in __solution__ 4 | ), "Importierst du die Klasse Doc?" 5 | assert ( 6 | doc.text == "spaCy ist cool!" 7 | ), "Bist du dir sicher, dass du das Doc richtig erstellt hast?" 8 | assert "print(doc.text)" in __solution__, "Druckst du den Text des Docs?" 9 | __msg__.good("Super!") 10 | -------------------------------------------------------------------------------- /exercises/de/test_02_09.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | 'spacy.load("en_core_web_md")' in __solution__ 4 | ), "Lädst du die mittelgroße Pipeline?" 5 | assert "doc[1].vector" in __solution__, "Greifst du auf den richtigen Vector zu?" 6 | __msg__.good( 7 | "Bravo! In der nächsten Übung wirst du spaCy benutzen, um mithilfe von " 8 | "Wortvektoren Ähnlichkeiten von Dokumenten, Spans und Tokens zu berechnen." 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/de/test_02_10_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__ 4 | ), "Vergleichst du die Ähnlichkeit der zwei Docs?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "Der Ähnlichkeitswert muss eine Zahl zwischen 0 und 1 sein. Hast du ihn korrekt berechnet?" 8 | __msg__.good("Gut gemacht!") 9 | -------------------------------------------------------------------------------- /exercises/de/test_02_10_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__ 4 | ), "Vergleichst du die Ähnlichkeit der zwei Tokens?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "Der Ähnlichkeitswert muss eine Zahl zwischen 0 und 1 sein. Hast du ihn korrekt berechnet?" 8 | __msg__.good("Prima!") 9 | -------------------------------------------------------------------------------- /exercises/de/test_03_14_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "for doc in nlp.pipe(TEXTS)" in __solution__ 4 | ), "Iterierst du über die Docs, die per yield von nlp.pipe ausgegeben werden?" 5 | __msg__.good("Super!") 6 | -------------------------------------------------------------------------------- /exercises/de/test_03_14_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "docs = list(nlp.pipe(TEXTS))" in __solution__ 4 | ), "Verwendest du nlp.pipe in einer Liste?" 5 | __msg__.good("Gute Arbeit!") 6 | -------------------------------------------------------------------------------- /exercises/de/test_03_14_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "patterns = list(nlp.pipe(people))" in __solution__ 4 | ), "Verwendest du nlp.pipe in einer Liste?" 5 | 6 | __msg__.good( 7 | "Gut gemacht! Als nächstes schauen wir uns ein praktisches Beispiel " 8 | "an, das nlp.pipe verwendet, um Dokumente mit zusätzlichen Metadaten " 9 | "zu verarbeiten." 10 | ) 11 | -------------------------------------------------------------------------------- /exercises/de/test_03_16_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc = nlp.make_doc(text)" in __solution__ 4 | or "doc = nlp.tokenizer(text)" in __solution__ 5 | ), "Verwendest du tatsächlich nur den Tokenizer?" 6 | 7 | __msg__.good("Sehr schön!") 8 | -------------------------------------------------------------------------------- /exercises/de/train_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/de/train_gadget.spacy -------------------------------------------------------------------------------- /exercises/en/dev_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/en/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/en/exc_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Import spaCy 2 | import ____ 3 | 4 | # Create the English nlp object 5 | nlp = ____ 6 | 7 | # Process a text 8 | doc = nlp("This is a sentence.") 9 | 10 | # Print the document text 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/en/exc_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Import spaCy 2 | import ____ 3 | 4 | # Create the German nlp object 5 | nlp = ____ 6 | 7 | # Process a text (this is German for: "Kind regards!") 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Print the document text 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/en/exc_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Import spaCy 2 | import ____ 3 | 4 | # Create the Spanish nlp object 5 | nlp = ____ 6 | 7 | # Process a text (this is Spanish for: "How are you?") 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Print the document text 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/en/exc_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Import spaCy and create the English nlp object 2 | import ____ 3 | 4 | nlp = ____ 5 | 6 | # Process the text 7 | doc = ____("I like tree kangaroos and narwhals.") 8 | 9 | # Select the first token 10 | first_token = doc[____] 11 | 12 | # Print the first token's text 13 | print(first_token.____) 14 | -------------------------------------------------------------------------------- /exercises/en/exc_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Load the "en_core_web_sm" pipeline 4 | nlp = ____ 5 | 6 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value" 7 | 8 | # Process the text 9 | doc = ____ 10 | 11 | # Print the document text 12 | print(____.____) 13 | -------------------------------------------------------------------------------- /exercises/en/exc_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_sm") 4 | 5 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value" 6 | 7 | # Process the text 8 | doc = ____ 9 | 10 | # Iterate over the predicted entities 11 | for ent in ____.____: 12 | # Print the entity text and its label 13 | print(ent.____, ____.____) 14 | -------------------------------------------------------------------------------- /exercises/en/exc_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | doc = nlp("I have a cat") 5 | 6 | # Look up the hash for the word "cat" 7 | cat_hash = ____.____.____[____] 8 | print(cat_hash) 9 | 10 | # Look up the cat_hash to get the string 11 | cat_string = ____.____.____[____] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/en/exc_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | doc = nlp("David Bowie is a PERSON") 5 | 6 | # Look up the hash for the string label "PERSON" 7 | person_hash = ____.____.____[____] 8 | print(person_hash) 9 | 10 | # Look up the person_hash to get the string 11 | person_string = ____.____.____[____] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/en/exc_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # Import the Doc class 6 | from ____ import ____ 7 | 8 | # Desired text: "spaCy is cool!" 9 | words = ["spaCy", "is", "cool", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # Create a Doc from the words and spaces 13 | doc = ____(____, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/en/exc_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # Import the Doc class 6 | from ____ import ____ 7 | 8 | # Desired text: "Go, get started!" 9 | words = ["Go", ",", "get", "started", "!"] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Create a Doc from the words and spaces 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/en/exc_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # Import the Doc class 6 | from ____ import ____ 7 | 8 | # Desired text: "Oh, really?!" 9 | words = [____, ____, ____, ____, ____] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Create a Doc from the words and spaces 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/en/exc_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Load the en_core_web_md pipeline 4 | nlp = ____ 5 | 6 | # Process a text 7 | doc = nlp("Two bananas in pyjamas") 8 | 9 | # Get the vector for the token "bananas" 10 | bananas_vector = ____.____ 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/en/exc_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc1 = nlp("It's a warm summer day") 6 | doc2 = nlp("It's sunny outside") 7 | 8 | # Get the similarity of doc1 and doc2 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/en/exc_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("TV and books") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Get the similarity of the tokens "TV" and "books" 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/en/exc_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.") 6 | 7 | # Create spans for "great restaurant" and "really nice bar" 8 | span1 = ____ 9 | span2 = ____ 10 | 11 | # Get the similarity of the spans 12 | similarity = ____.____(____) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/en/exc_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Load the en_core_web_sm pipeline 4 | nlp = ____ 5 | 6 | # Print the names of the pipeline components 7 | print(____.____) 8 | 9 | # Print the full pipeline of (name, component) tuples 10 | print(____.____) 11 | -------------------------------------------------------------------------------- /exercises/en/exc_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("en_core_web_sm") 5 | 6 | with open("exercises/en/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Process the texts and print the adjectives 10 | for text in TEXTS: 11 | doc = nlp(text) 12 | print([token.text for token in doc if token.pos_ == "ADJ"]) 13 | -------------------------------------------------------------------------------- /exercises/en/exc_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("en_core_web_sm") 5 | 6 | with open("exercises/en/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Process the texts and print the entities 10 | docs = [nlp(text) for text in TEXTS] 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/en/exc_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Create a list of patterns for the PhraseMatcher 8 | patterns = [nlp(person) for person in people] 9 | -------------------------------------------------------------------------------- /exercises/en/exc_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_sm") 4 | text = ( 5 | "Chick-fil-A is an American fast food restaurant chain headquartered in " 6 | "the city of College Park, Georgia, specializing in chicken sandwiches." 7 | ) 8 | 9 | # Only tokenize the text 10 | doc = nlp(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/en/exc_03_16_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_sm") 4 | text = ( 5 | "Chick-fil-A is an American fast food restaurant chain headquartered in " 6 | "the city of College Park, Georgia, specializing in chicken sandwiches." 7 | ) 8 | 9 | # Disable the tagger and lemmatizer 10 | with ____.____(____): 11 | # Process the text 12 | doc = ____ 13 | # Print the entities in the doc 14 | print(____) 15 | -------------------------------------------------------------------------------- /exercises/en/exc_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ ____ --___ ____ --____ ____ 2 | -------------------------------------------------------------------------------- /exercises/en/exc_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/en/exc_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____ 2 | -------------------------------------------------------------------------------- /exercises/en/iphone.json: -------------------------------------------------------------------------------- 1 | [ 2 | "How to preorder the iPhone X", 3 | "iPhone X is coming", 4 | "Should I pay $1,000 for the iPhone X?", 5 | "The iPhone 8 reviews are here", 6 | "iPhone 11 vs iPhone 8: What's the difference?", 7 | "I need a new phone! Any tips?" 8 | ] 9 | -------------------------------------------------------------------------------- /exercises/en/solution_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Import spaCy 2 | import spacy 3 | 4 | # Create the English nlp object 5 | nlp = spacy.blank("en") 6 | 7 | # Process a text 8 | doc = nlp("This is a sentence.") 9 | 10 | # Print the document text 11 | print(doc.text) 12 | 13 | -------------------------------------------------------------------------------- /exercises/en/solution_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Import spaCy 2 | import spacy 3 | 4 | # Create the German nlp object 5 | nlp = spacy.blank("de") 6 | 7 | # Process a text (this is German for: "Kind regards!") 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Print the document text 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/en/solution_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Import spaCy 2 | import spacy 3 | 4 | # Create the Spanish nlp object 5 | nlp = spacy.blank("es") 6 | 7 | # Process a text (this is Spanish for: "How are you?") 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Print the document text 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/en/solution_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Import spaCy and create the English nlp object 2 | import spacy 3 | 4 | nlp = spacy.blank("en") 5 | 6 | # Process the text 7 | doc = nlp("I like tree kangaroos and narwhals.") 8 | 9 | # Select the first token 10 | first_token = doc[0] 11 | 12 | # Print the first token's text 13 | print(first_token.text) 14 | -------------------------------------------------------------------------------- /exercises/en/solution_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Load the "en_core_web_sm" pipeline 4 | nlp = spacy.load("en_core_web_sm") 5 | 6 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value" 7 | 8 | # Process the text 9 | doc = nlp(text) 10 | 11 | # Print the document text 12 | print(doc.text) 13 | -------------------------------------------------------------------------------- /exercises/en/solution_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_sm") 4 | 5 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value" 6 | 7 | # Process the text 8 | doc = nlp(text) 9 | 10 | # Iterate over the predicted entities 11 | for ent in doc.ents: 12 | # Print the entity text and its label 13 | print(ent.text, ent.label_) 14 | -------------------------------------------------------------------------------- /exercises/en/solution_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | doc = nlp("I have a cat") 5 | 6 | # Look up the hash for the word "cat" 7 | cat_hash = nlp.vocab.strings["cat"] 8 | print(cat_hash) 9 | 10 | # Look up the cat_hash to get the string 11 | cat_string = nlp.vocab.strings[cat_hash] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/en/solution_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | doc = nlp("David Bowie is a PERSON") 5 | 6 | # Look up the hash for the string label "PERSON" 7 | person_hash = nlp.vocab.strings["PERSON"] 8 | print(person_hash) 9 | 10 | # Look up the person_hash to get the string 11 | person_string = nlp.vocab.strings[person_hash] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/en/solution_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # Import the Doc class 6 | from spacy.tokens import Doc 7 | 8 | # Desired text: "spaCy is cool!" 9 | words = ["spaCy", "is", "cool", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # Create a Doc from the words and spaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/en/solution_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # Import the Doc class 6 | from spacy.tokens import Doc 7 | 8 | # Desired text: "Go, get started!" 9 | words = ["Go", ",", "get", "started", "!"] 10 | spaces = [False, True, True, False, False] 11 | 12 | # Create a Doc from the words and spaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/en/solution_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # Import the Doc class 6 | from spacy.tokens import Doc 7 | 8 | # Desired text: "Oh, really?!" 9 | words = ["Oh", ",", "really", "?", "!"] 10 | spaces = [False, True, False, False, False] 11 | 12 | # Create a Doc from the words and spaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/en/solution_02_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_sm") 4 | doc = nlp("Berlin looks like a nice city") 5 | 6 | # Iterate over the tokens 7 | for token in doc: 8 | # Check if the current token is a proper noun 9 | if token.pos_ == "PROPN": 10 | # Check if the next token is a verb 11 | if doc[token.i + 1].pos_ == "VERB": 12 | print("Found proper noun before a verb:", token.text) 13 | -------------------------------------------------------------------------------- /exercises/en/solution_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Load the en_core_web_md pipeline 4 | nlp = spacy.load("en_core_web_md") 5 | 6 | # Process a text 7 | doc = nlp("Two bananas in pyjamas") 8 | 9 | # Get the vector for the token "bananas" 10 | bananas_vector = doc[1].vector 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/en/solution_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc1 = nlp("It's a warm summer day") 6 | doc2 = nlp("It's sunny outside") 7 | 8 | # Get the similarity of doc1 and doc2 9 | similarity = doc1.similarity(doc2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/en/solution_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("TV and books") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Get the similarity of the tokens "TV" and "books" 9 | similarity = token1.similarity(token2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/en/solution_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_md") 4 | 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.") 6 | 7 | # Create spans for "great restaurant" and "really nice bar" 8 | span1 = doc[3:5] 9 | span2 = doc[12:15] 10 | 11 | # Get the similarity of the spans 12 | similarity = span1.similarity(span2) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/en/solution_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Load the en_core_web_sm pipeline 4 | nlp = spacy.load("en_core_web_sm") 5 | 6 | # Print the names of the pipeline components 7 | print(nlp.pipe_names) 8 | 9 | # Print the full pipeline of (name, component) tuples 10 | print(nlp.pipeline) 11 | -------------------------------------------------------------------------------- /exercises/en/solution_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("en_core_web_sm") 5 | 6 | with open("exercises/en/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Process the texts and print the adjectives 10 | for doc in nlp.pipe(TEXTS): 11 | print([token.text for token in doc if token.pos_ == "ADJ"]) 12 | -------------------------------------------------------------------------------- /exercises/en/solution_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("en_core_web_sm") 5 | 6 | with open("exercises/en/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Process the texts and print the entities 10 | docs = list(nlp.pipe(TEXTS)) 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/en/solution_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Create a list of patterns for the PhraseMatcher 8 | patterns = list(nlp.pipe(people)) 9 | -------------------------------------------------------------------------------- /exercises/en/solution_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("en_core_web_sm") 4 | text = ( 5 | "Chick-fil-A is an American fast food restaurant chain headquartered in " 6 | "the city of College Park, Georgia, specializing in chicken sandwiches." 7 | ) 8 | 9 | # Only tokenize the text 10 | doc = nlp.make_doc(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/en/solution_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy init config ./config.cfg --lang en --pipeline ner 2 | -------------------------------------------------------------------------------- /exercises/en/solution_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/en/solution_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy train ./exercises/en/config_gadget.cfg --output ./output --paths.train ./exercises/en/train_gadget.spacy --paths.dev ./exercises/en/dev_gadget.spacy 2 | -------------------------------------------------------------------------------- /exercises/en/test_02_05_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "from spacy.tokens import Doc" in __solution__ 4 | ), "Are you importing the Doc class correctly?" 5 | assert doc.text == "spaCy is cool!", "Are you sure you created the Doc correctly?" 6 | assert "print(doc.text)" in __solution__, "Are you printing the Doc's text?" 7 | __msg__.good("Well done!") 8 | -------------------------------------------------------------------------------- /exercises/en/test_02_10_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__ 4 | ), "Are you comparing the similarity of the two docs?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "The value of similarity needs to be a float. Did you calculate it correctly?" 8 | __msg__.good("Well done!") 9 | -------------------------------------------------------------------------------- /exercises/en/test_02_10_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__ 4 | ), "Are you comparing the similarity of the two tokens?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "The value of similarity needs to be a float. Did you calculate it correctly?" 8 | __msg__.good("Nicely done!") 9 | -------------------------------------------------------------------------------- /exercises/en/test_03_14_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "for doc in nlp.pipe(TEXTS)" in __solution__ 4 | ), "Are you iterating over docs yielded by nlp.pipe?" 5 | __msg__.good("Nice!") 6 | -------------------------------------------------------------------------------- /exercises/en/test_03_14_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "docs = list(nlp.pipe(TEXTS))" in __solution__ 4 | ), "Are you using nlp.pipe wrapped in a list?" 5 | __msg__.good("Great work!") 6 | -------------------------------------------------------------------------------- /exercises/en/test_03_14_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "patterns = list(nlp.pipe(people))" in __solution__ 4 | ), "Are you using nlp.pipe wrapped in a list?" 5 | 6 | __msg__.good( 7 | "Good job! Let's move on to a practical example that uses nlp.pipe " 8 | "to process documents with additional meta data." 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/en/test_03_16_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc = nlp.make_doc(text)" in __solution__ 4 | or "doc = nlp.tokenizer(text)" in __solution__ 5 | ), "Are you only tokenizing the text?" 6 | 7 | __msg__.good("Nicely done!") 8 | -------------------------------------------------------------------------------- /exercises/en/train_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/en/train_gadget.spacy -------------------------------------------------------------------------------- /exercises/es/adidas.json: -------------------------------------------------------------------------------- 1 | [ 2 | "Cómo pre-ordenar los adidas ZX", 3 | "Los nuevos adidas ZX vienen en camino", 4 | "Debería pagar €200 por un par de adidas ZX?", 5 | "Cuál es la diferencia entre los adidas 8000 y los adidas 4000?", 6 | "Necesito nuevas zapatillas! ¿Qué me recomiendan?" 7 | ] 8 | -------------------------------------------------------------------------------- /exercises/es/dev_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/es/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/es/exc_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy 2 | import ____ 3 | 4 | # Crea el objeto nlp para procesar inglés 5 | nlp = ____ 6 | 7 | # Procesa un texto (aquí dice "Esta es una oración" en inglés) 8 | doc = nlp("This is a sentence.") 9 | 10 | # Imprime en pantalla el texto del documento 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/es/exc_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy 2 | import ____ 3 | 4 | # Crea el objeto nlp para procesar alemán 5 | nlp = ____ 6 | 7 | # Procesa un texto (aquí dice "Saludos cordiales!" en alemán) 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Imprime en pantalla el texto del documento 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/es/exc_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy 2 | import ____ 3 | 4 | # Crea el objeto nlp para procesar español 5 | nlp = ____ 6 | 7 | # Procesa un texto 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Imprime en pantalla el texto del documento 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/es/exc_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy y crea el objeto nlp para procesar español 2 | import ____ 3 | 4 | nlp = ____ 5 | 6 | # Procesa el texto 7 | doc = ____("Me gustan las panteras negras y los leones.") 8 | 9 | # Selecciona el primer token 10 | first_token = doc[____] 11 | 12 | # Imprime en pantalla el texto del token 13 | print(first_token.____) 14 | -------------------------------------------------------------------------------- /exercises/es/exc_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carga el modelo "es_core_news_sm" 4 | nlp = ____ 5 | 6 | text = ( 7 | "De acuerdo con la revista global de negocios Fortune, Apple fue " 8 | "la empresa más admirada en el mundo entre 2008 y 2012." 9 | ) 10 | 11 | # Procesa el texto 12 | doc = ____ 13 | 14 | # Imprime en pantalla el texto del documento 15 | print(____.____) 16 | -------------------------------------------------------------------------------- /exercises/es/exc_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | doc = nlp("Yo tengo un gato") 5 | 6 | # Busca el hash para la palabra "gato" 7 | gato_hash = ____.____.____[____] 8 | print(gato_hash) 9 | 10 | # Busca el gato_hash para obtener el string 11 | gato_string = ____.____.____[____] 12 | print(gato_string) 13 | -------------------------------------------------------------------------------- /exercises/es/exc_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | doc = nlp("David Bowie tiene el label PER") 5 | 6 | # Busca el hash para el label del string "PER" 7 | person_hash = ____.____.____[____] 8 | print(person_hash) 9 | 10 | # Busca el person_hash para obtener el string 11 | person_string = ____.____.____[____] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/es/exc_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | 5 | # Importa la clase Doc 6 | from ____ import ____ 7 | 8 | # El texto deseado: "spaCy es divertido!" 9 | words = ["spaCy", "es", "divertido", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # Crea un Doc a partir de las palabras y los espacios 13 | doc = ____(____, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/es/exc_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | 5 | # Importa la clase Doc 6 | from ____ import ____ 7 | 8 | # El texto deseado: "¡Vamos, empieza!" 9 | words = ["¡", "Vamos", ",", "empieza", "!"] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Crea un Doc a partir de las palabras y los espacios 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/es/exc_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | 5 | # Importa la clase Doc 6 | from ____ import ____ 7 | 8 | # El texto deseado: "¡¿En serio?!" 9 | words = [____, ____, ____, ____, ____, ____] 10 | spaces = [____, ____, ____, ____, ____, ____] 11 | 12 | # Crea un Doc a partir de las palabras y los espacios 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/es/exc_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carga el modelo es_core_news_md 4 | nlp = ____ 5 | 6 | # Procesa un texto 7 | doc = nlp("Hoy hice pan de banano") 8 | 9 | # Obtén el vector para el token "banano" 10 | banano_vector = ____.____ 11 | print(banano_vector) 12 | -------------------------------------------------------------------------------- /exercises/es/exc_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_md") 4 | 5 | doc1 = nlp("Es un cálido día de verano") 6 | doc2 = nlp("Hay sol afuera") 7 | 8 | # Obtén la similitud entre el doc1 y el doc2 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/es/exc_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_md") 4 | 5 | doc = nlp("TV y libros") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Obtén la similitud entre los tokens "TV" y "libros" 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/es/exc_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_md") 4 | 5 | doc = nlp( 6 | "Estuvimos en un restaurante genial. Luego, fuimos a un bar muy divertido." 7 | ) 8 | 9 | # Crea los spans para "restaurante genial" y "bar muy divertido" 10 | span1 = ____ 11 | span2 = ____ 12 | 13 | # Obtén la similitud entre los dos spans 14 | similarity = ____.____(____) 15 | print(similarity) 16 | -------------------------------------------------------------------------------- /exercises/es/exc_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carga el modelo es_core_news_sm 4 | nlp = ____ 5 | 6 | # Imprime en pantalla los nombres de los componentes del pipeline 7 | print(____.____) 8 | 9 | # Imprime en pantalla el pipeline entero de tuples (name, component) 10 | print(____.____) 11 | -------------------------------------------------------------------------------- /exercises/es/exc_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("es_core_news_sm") 5 | 6 | with open("exercises/es/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Procesa los textos e imprime los verbos en pantalla 10 | for text in TEXTS: 11 | doc = nlp(text) 12 | print([token.text for token in doc if token.pos_ == "VERB"]) 13 | -------------------------------------------------------------------------------- /exercises/es/exc_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("es_core_news_sm") 5 | 6 | with open("exercises/es/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Procesa los textos e imprime las entidades en pantalla 10 | docs = [nlp(text) for text in TEXTS] 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/es/exc_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("es") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Crea una lista de patrones para el PhraseMatcher 8 | patterns = [nlp(person) for person in people] 9 | -------------------------------------------------------------------------------- /exercises/es/exc_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | text = ( 5 | "Chick-fil-A es una cadena de restaurantes de comida rápida " 6 | "americana con sede en la ciudad de College Park, Georgia, " 7 | "especializada en sándwiches de pollo." 8 | ) 9 | 10 | # Únicamente convierte el texto en tokens 11 | doc = nlp(text) 12 | print([token.text for token in doc]) 13 | -------------------------------------------------------------------------------- /exercises/es/exc_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ ____ --___ ____ --____ ____ 2 | -------------------------------------------------------------------------------- /exercises/es/exc_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/es/exc_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____ 2 | -------------------------------------------------------------------------------- /exercises/es/solution_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy 2 | import spacy 3 | 4 | # Crea el objeto nlp para procesar inglés 5 | nlp = spacy.blank("en") 6 | 7 | # Procesa un texto (aquí dice "Esta es una oración" en inglés) 8 | doc = nlp("This is a sentence.") 9 | 10 | # Imprime en pantalla el texto del documento 11 | print(doc.text) 12 | 13 | -------------------------------------------------------------------------------- /exercises/es/solution_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy 2 | import spacy 3 | 4 | # Crea el objeto nlp para procesar alemán 5 | nlp = spacy.blank("de") 6 | 7 | # Procesa un texto (aquí dice "Saludos cordiales!" en alemán) 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Imprime en pantalla el texto del documento 11 | print(doc.text) 12 | 13 | -------------------------------------------------------------------------------- /exercises/es/solution_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy 2 | import spacy 3 | 4 | # Crea el objeto nlp para procesar español 5 | nlp = spacy.blank("es") 6 | 7 | # Procesa un texto 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Imprime en pantalla el texto del documento 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/es/solution_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importa spaCy y crea el objeto nlp para procesar español 2 | import spacy 3 | 4 | nlp = spacy.blank("es") 5 | 6 | # Procesa el texto 7 | doc = nlp("Me gustan las panteras negras y los leones.") 8 | 9 | # Selecciona el primer token 10 | first_token = doc[0] 11 | 12 | # Imprime en pantalla el texto del token 13 | print(first_token.text) 14 | -------------------------------------------------------------------------------- /exercises/es/solution_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carga el modelo "es_core_news_sm" 4 | nlp = spacy.load("es_core_news_sm") 5 | 6 | text = ( 7 | "De acuerdo con la revista global de negocios Fortune, Apple fue " 8 | "la empresa más admirada en el mundo entre 2008 y 2012." 9 | ) 10 | 11 | # Procesa el texto 12 | doc = nlp(text) 13 | 14 | # Imprime en pantalla el texto del documento 15 | print(doc.text) 16 | -------------------------------------------------------------------------------- /exercises/es/solution_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | doc = nlp("Yo tengo un gato") 5 | 6 | # Busca el hash para la palabra "gato" 7 | gato_hash = nlp.vocab.strings["gato"] 8 | print(gato_hash) 9 | 10 | # Busca el gato_hash para obtener el string 11 | gato_string = nlp.vocab.strings[gato_hash] 12 | print(gato_string) 13 | -------------------------------------------------------------------------------- /exercises/es/solution_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | doc = nlp("David Bowie tiene el label PER") 5 | 6 | # Busca el hash para el label del string "PER" 7 | person_hash = nlp.vocab.strings["PER"] 8 | print(person_hash) 9 | 10 | # Busca el person_hash para obtener el string 11 | person_string = nlp.vocab.strings[person_hash] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/es/solution_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | 5 | # Importa la clase Doc 6 | from spacy.tokens import Doc 7 | 8 | # El texto deseado: "spaCy es divertido!" 9 | words = ["spaCy", "es", "divertido", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # Crea un Doc a partir de las palabras y los espacios 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/es/solution_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | 5 | # Importa la clase Doc 6 | from spacy.tokens import Doc 7 | 8 | # El texto deseado: "¡Vamos, empieza!" 9 | words = ["¡", "Vamos", ",", "empieza", "!"] 10 | spaces = [False, False, True, False, False] 11 | 12 | # Crea un Doc a partir de las palabras y los espacios 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/es/solution_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | 5 | # Importa la clase Doc 6 | from spacy.tokens import Doc 7 | 8 | # El texto deseado: "¡¿En serio?!" 9 | words = ["¡", "¿", "En", "serio", "?", "!"] 10 | spaces = [False, False, True, False, False, False] 11 | 12 | # Crea un Doc a partir de las palabras y los espacios 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/es/solution_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carga el modelo es_core_news_md 4 | nlp = spacy.load("es_core_news_md") 5 | 6 | # Procesa un texto 7 | doc = nlp("Hoy hice pan de banano") 8 | 9 | # Obtén el vector para el token "banano" 10 | banano_vector = doc[4].vector 11 | print(banano_vector) 12 | -------------------------------------------------------------------------------- /exercises/es/solution_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_md") 4 | 5 | doc1 = nlp("Es un cálido día de verano") 6 | doc2 = nlp("Hay sol afuera") 7 | 8 | # Obtén la similitud entre el doc1 y el doc2 9 | similarity = doc1.similarity(doc2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/es/solution_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_md") 4 | 5 | doc = nlp("TV y libros") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Obtén la similitud entre los tokens "TV" y "libros" 9 | similarity = token1.similarity(token2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/es/solution_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_md") 4 | 5 | doc = nlp( 6 | "Estuvimos en un restaurante genial. Luego, fuimos a un bar muy divertido." 7 | ) 8 | 9 | # Crea los spans para "restaurante genial" y "bar muy divertido" 10 | span1 = doc[3:5] 11 | span2 = doc[11:14] 12 | 13 | # Obtén la similitud entre los dos spans 14 | similarity = span1.similarity(span2) 15 | print(similarity) 16 | -------------------------------------------------------------------------------- /exercises/es/solution_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carga el modelo es_core_news_sm 4 | nlp = spacy.load("es_core_news_sm") 5 | 6 | # Imprime en pantalla los nombres de los componentes del pipeline 7 | print(nlp.pipe_names) 8 | 9 | # Imprime en pantalla el pipeline entero de tuples (name, component) 10 | print(nlp.pipeline) 11 | -------------------------------------------------------------------------------- /exercises/es/solution_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("es_core_news_sm") 5 | 6 | with open("exercises/es/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Procesa los textos e imprime los verbos en pantalla 10 | for doc in nlp.pipe(TEXTS): 11 | print([token.text for token in doc if token.pos_ == "VERB"]) 12 | -------------------------------------------------------------------------------- /exercises/es/solution_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("es_core_news_sm") 5 | 6 | with open("exercises/es/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Procesa los textos e imprime las entidades en pantalla 10 | docs = list(nlp.pipe(TEXTS)) 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/es/solution_03_14_03.py: -------------------------------------------------------------------------------- 1 | from spacy.lang.es import Spanish 2 | 3 | nlp = Spanish() 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Crea una lista de patrones para el PhraseMatcher 8 | patterns = list(nlp.pipe(people)) 9 | -------------------------------------------------------------------------------- /exercises/es/solution_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("es_core_news_sm") 4 | text = ( 5 | "Chick-fil-A es una cadena de restaurantes de comida rápida " 6 | "americana con sede en la ciudad de College Park, Georgia, " 7 | "especializada en sándwiches de pollo." 8 | ) 9 | 10 | # Únicamente convierte el texto en tokens 11 | doc = nlp.make_doc(text) 12 | print([token.text for token in doc]) 13 | -------------------------------------------------------------------------------- /exercises/es/solution_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy init config ./config.cfg --lang es --pipeline ner 2 | -------------------------------------------------------------------------------- /exercises/es/solution_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/es/solution_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy train ./exercises/en/config_gadget.cfg --output ./output --paths.train ./exercises/en/train_gadget.spacy --paths.dev ./exercises/en/dev_gadget.spacy 2 | -------------------------------------------------------------------------------- /exercises/es/test_02_02_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert gato_hash == nlp.vocab.strings["gato"], "¿Asignaste el hash correcto?" 3 | assert 'nlp.vocab.strings["gato"]' in __solution__ 4 | assert gato_string == "gato", "¿Obtuviste el string correcto?" 5 | assert ( 6 | "nlp.vocab.strings[gato_hash]" in __solution__ 7 | ), "¿Obtuviste el string usando el hash?" 8 | 9 | __msg__.good("¡Muy buen trabajo!") 10 | -------------------------------------------------------------------------------- /exercises/es/test_02_02_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert person_hash == nlp.vocab.strings["PER"], "¿Asignaste el hash correcto?" 3 | assert 'nlp.vocab.strings["PER"]' in __solution__ 4 | assert person_string == "PER", "¿Obtuviste el string correcto?" 5 | assert ( 6 | "nlp.vocab.strings[person_hash]" in __solution__ 7 | ), "¿Obtuviste el string usando el hash?" 8 | 9 | __msg__.good("¡Buen trabajo!") 10 | -------------------------------------------------------------------------------- /exercises/es/test_02_05_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "from spacy.tokens import Doc" in __solution__ 4 | ), "¿Estás importando la clase Doc correctamente?" 5 | assert doc.text == "spaCy es divertido!", "¿Creaste el Doc correctamente?" 6 | assert "print(doc.text)" in __solution__, "¿Estás imprimiendo en pantalla el texto del Doc?" 7 | __msg__.good("¡Bien hecho!") 8 | -------------------------------------------------------------------------------- /exercises/es/test_02_10_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc1.similarity(doc2)" or "doc2.similarity(doc1)" in __solution__ 4 | ), "¿Estás comparando la similitud entre los dos docs?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "El valor de la similitud debe ser de punto flotante. ¿Lo calculaste correctamente?" 8 | __msg__.good("¡Bien hecho!") 9 | -------------------------------------------------------------------------------- /exercises/es/test_02_10_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token1.similarity(token2)" or "token2.similarity(token1)" in __solution__ 4 | ), "¿Estás comparando la similitud entre los dos tokens?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "El valor de la similitud debe ser de punto flotante. ¿Lo calculaste correctamente?" 8 | __msg__.good("¡Muy bien hecho!") 9 | -------------------------------------------------------------------------------- /exercises/es/test_03_14_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "for doc in nlp.pipe(TEXTS)" in __solution__ 4 | ), "¿Estás iterando sobre los docs que fueron devueltos usando yield por nlp.pipe?" 5 | __msg__.good("¡Bien!") 6 | -------------------------------------------------------------------------------- /exercises/es/test_03_14_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "docs = list(nlp.pipe(TEXTS))" in __solution__ 4 | ), "¿Estás usando nlp.pipe envuelto en una lista?" 5 | __msg__.good("¡Muy buen trabajo!") 6 | -------------------------------------------------------------------------------- /exercises/es/test_03_14_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "patterns = list(nlp.pipe(people))" in __solution__ 4 | ), "¿Estás usando nlp.pipe envuelto en una lista?" 5 | 6 | __msg__.good( 7 | "¡Buen trabajo! Ahora continuemos con un ejemplo práctico que usa nlp.pipe " 8 | "para procesar documentos con metadatos adicionales." 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/es/test_03_16_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc = nlp.make_doc(text)" in __solution__ 4 | or "doc = nlp.tokenizer(text)" in __solution__ 5 | ), "¿Solo estás convirtiendo el texto en tokens?" 6 | 7 | __msg__.good("¡Bien hecho!") 8 | -------------------------------------------------------------------------------- /exercises/es/test_03_16_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | 'with nlp.select_pipes(disable=["parser"])' in __solution__ 4 | ), "¿Estás usando nlp.select_pipes con los componentes correctos?" 5 | 6 | __msg__.good( 7 | "¡Perfecto! Ahora que has practicado los consejos y trucos de rendimiento, " 8 | "puedes pasar al siguiente capítulo y entrenar modelos de redes neurales de spaCy." 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/es/train_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/es/train_gadget.spacy -------------------------------------------------------------------------------- /exercises/fr/dev_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/fr/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/fr/exc_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importe spaCy 2 | import ____ 3 | 4 | # Crée l'objet nlp français 5 | nlp = ____ 6 | 7 | # Traite un texte 8 | doc = nlp("Ceci est une phrase.") 9 | 10 | # Affiche le texte du document 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/fr/exc_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importe spaCy 2 | import ____ 3 | 4 | # Crée l'objet nlp anglais 5 | nlp = ____ 6 | 7 | # Traite un texte (il signifie "Ceci est une phrase" en anglais) 8 | doc = nlp("This is a sentence.") 9 | 10 | # Affiche le texte du document 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/fr/exc_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importe spaCy 2 | import ____ 3 | 4 | # Crée l'objet nlp espagnol 5 | nlp = ____ 6 | 7 | # Traite un texte (il signifie "Comment vas-tu ?" en espagnol) 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Affiche le texte du document 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/fr/exc_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importe spacy et crée l'objet nlp français 2 | import ____ 3 | 4 | nlp = ____ 5 | 6 | # Traite le texte 7 | doc = ____("La forêt est peuplée de loups gris et renards roux.") 8 | 9 | # Sélectionne le premier token 10 | first_token = doc[____] 11 | 12 | # Affiche le texte du premier token 13 | print(first_token.____) 14 | -------------------------------------------------------------------------------- /exercises/fr/exc_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Charge le pipeline "fr_core_news_sm" 4 | nlp = ____ 5 | 6 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne." 7 | 8 | # Traite le texte 9 | doc = ____ 10 | 11 | # Affiche le texte du document 12 | print(____.____) 13 | -------------------------------------------------------------------------------- /exercises/fr/exc_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_sm") 4 | 5 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne." 6 | 7 | # Traite le texte 8 | doc = ____ 9 | 10 | # Itère sur les entités prédites 11 | for ent in ____.____: 12 | # Affiche le texte de l'entité et son label 13 | print(ent.____, ____.____) 14 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | doc = nlp("J'ai un chat") 5 | 6 | # Recherche le hash pour le mot "chat" 7 | cat_hash = ____.____.____[____] 8 | print(cat_hash) 9 | 10 | # Recherche cat_hash pour obtenir la chaine 11 | cat_string = ____.____.____[____] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | doc = nlp("David Bowie a le label PER") 5 | 6 | # Cherche le hash pour le label de chaine "PER" 7 | person_hash = ____.____.____[____] 8 | print(person_hash) 9 | 10 | # Cherche person_hash pour obtenir la chaine 11 | person_string = ____.____.____[____] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | # Importe la classe Doc 6 | from ____ import ____ 7 | 8 | # Texte désiré : "spaCy est cool." 9 | words = ["spaCy", "est", "cool", "."] 10 | spaces = [True, True, False, False] 11 | 12 | # Crée un Doc à partir des mots et des espaces 13 | doc = ____(____, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | # Importe la classe Doc 6 | from ____ import ____ 7 | 8 | # Texte désiré : "Allez, on commence !" 9 | words = ["Allez", ",", "on", "commence", "!"] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Crée un Doc à partir des mots et des espaces 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | # Importe la classe Doc 6 | from ____ import ____ 7 | 8 | # Texte désiré : "Oh, vraiment ?!" 9 | words = [____, ____, ____, ____, ____] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Crée un Doc à partir des mots et des espaces 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Charge le pipeline fr_core_news_md 4 | nlp = ____ 5 | 6 | # Traite le texte 7 | doc = nlp("Deux bananes en pyjamas") 8 | 9 | # Obtiens le vecteur pour le token "bananes" 10 | bananas_vector = ____.____ 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_md") 4 | 5 | doc1 = nlp("Le temps est au beau fixe") 6 | doc2 = nlp("Le ciel est clair") 7 | 8 | # Obtiens la similarité entre doc1 et doc2 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_md") 4 | 5 | doc = nlp("télé et livres") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Obtiens la similarité entre les tokens "télé" et "livres" 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/fr/exc_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_md") 4 | 5 | doc = nlp("C'était un super restaurant. Ensuite nous sommes allés dans un bar vraiment sympa.") 6 | 7 | # Crée des spans pour "super restaurant" et "bar vraiment sympa" 8 | span1 = ____ 9 | span2 = ____ 10 | 11 | # Obtiens la similarité entre les spans 12 | similarity = ____.____(____) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/fr/exc_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Charge le pipeline fr_core_news_sm 4 | nlp = ____ 5 | 6 | # Affiche les noms des composants du pipeline 7 | print(____.____) 8 | 9 | # Affiche tous les tuples de (name, component) du pipeline 10 | print(____.____) 11 | -------------------------------------------------------------------------------- /exercises/fr/exc_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("fr_core_news_sm") 5 | 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Traite les textes et affiche les adjectifs 10 | for text in TEXTS: 11 | doc = nlp(text) 12 | print([token.text for token in doc if token.pos_ == "ADJ"]) 13 | -------------------------------------------------------------------------------- /exercises/fr/exc_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("fr_core_news_sm") 5 | 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Traite les textes et affiche les entités 10 | docs = [nlp(text) for text in TEXTS] 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/fr/exc_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Crée une liste de motifs pour le PhraseMatcher 8 | patterns = [nlp(person) for person in people] 9 | -------------------------------------------------------------------------------- /exercises/fr/exc_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_sm") 4 | text = ( 5 | "Le groupe aéronautique Airbus construit des avions et des " 6 | "hélicoptères vendus dans le monde entier. Le siège opérationnel du " 7 | "groupe est situé en France à Toulouse dans la région Occitanie." 8 | ) 9 | 10 | # Tokenise seulement le texte 11 | doc = nlp(text) 12 | print([token.text for token in doc]) 13 | -------------------------------------------------------------------------------- /exercises/fr/exc_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ ____ --___ ____ --____ ____ 2 | -------------------------------------------------------------------------------- /exercises/fr/exc_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/fr/exc_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____ 2 | -------------------------------------------------------------------------------- /exercises/fr/iphone.json: -------------------------------------------------------------------------------- 1 | [ 2 | "Comment précommander l'iPhone X", 3 | "l'iPhone X arrive", 4 | "Dois-je dépenser 1.000 € pour l'iPhone X ?", 5 | "Les tests de l'iPhone 8 sont là", 6 | "iPhone 11 contre iPhone 8 : quelles sont les différences ?", 7 | "Il me faut un nouveau téléphone ! Des suggestions à me faire ?" 8 | ] 9 | -------------------------------------------------------------------------------- /exercises/fr/solution_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importe spaCy 2 | import spacy 3 | 4 | # Crée l'objet nlp français 5 | nlp = spacy.blank("fr") 6 | 7 | # Traite un texte 8 | doc = nlp("Ceci est une phrase.") 9 | 10 | # Affiche le texte du document 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/fr/solution_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importe spaCy 2 | import spacy 3 | 4 | # Crée l'objet nlp anglais 5 | nlp = spacy.blank("en") 6 | 7 | # Traite un texte (il signifie "Ceci est une phrase" en anglais) 8 | doc = nlp("This is a sentence.") 9 | 10 | # Affiche le texte du document 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/fr/solution_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importe spaCy 2 | import spacy 3 | 4 | # Crée l'objet nlp espagnol 5 | nlp = spacy.blank("es") 6 | 7 | # Traite un texte (il signifie "Comment vas-tu ?" en espagnol) 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Affiche le texte du document 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/fr/solution_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importe spacy et crée l'objet nlp français 2 | import spacy 3 | 4 | nlp = spacy.blank("fr") 5 | 6 | # Traite le texte 7 | doc = nlp("La forêt est peuplée de loups gris et renards roux.") 8 | 9 | # Sélectionne le premier token 10 | first_token = doc[0] 11 | 12 | # Affiche le texte du premier token 13 | print(first_token.text) 14 | -------------------------------------------------------------------------------- /exercises/fr/solution_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Charge le pipeline "fr_core_news_sm" 4 | nlp = spacy.load("fr_core_news_sm") 5 | 6 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne." 7 | 8 | # Traite le texte 9 | doc = nlp(text) 10 | 11 | # Affiche le texte du document 12 | print(doc.text) 13 | -------------------------------------------------------------------------------- /exercises/fr/solution_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_sm") 4 | 5 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne." 6 | 7 | # Traite le texte 8 | doc = nlp(text) 9 | 10 | # Itère sur les entités prédites 11 | for ent in doc.ents: 12 | # Affiche le texte de l'entité et son label 13 | print(ent.text, ent.label_) 14 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | doc = nlp("J'ai un chat") 5 | 6 | # Recherche le hash pour le mot "chat" 7 | cat_hash = nlp.vocab.strings["chat"] 8 | print(cat_hash) 9 | 10 | # Recherche chat_hash pour obtenir la chaine 11 | cat_string = nlp.vocab.strings[cat_hash] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | doc = nlp("David Bowie a le label PER") 5 | 6 | # Cherche le hash pour le label de chaine "PER" 7 | person_hash = nlp.vocab.strings["PER"] 8 | print(person_hash) 9 | 10 | # Cherche person_hash pour obtenir la chaine 11 | person_string = nlp.vocab.strings[person_hash] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | # Importe la classe Doc 6 | from spacy.tokens import Doc 7 | 8 | # Texte désiré : "spaCy est cool." 9 | words = ["spaCy", "est", "cool", "."] 10 | spaces = [True, True, False, False] 11 | 12 | # Crée un Doc à partir des mots et des espaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | # Importe la classe Doc 6 | from spacy.tokens import Doc 7 | 8 | # Texte désiré : "Allez, on commence !" 9 | words = ["Allez", ",", "on", "commence", "!"] 10 | spaces = [False, True, True, True, False] 11 | 12 | # Crée un Doc à partir des mots et des espaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | # Importe la classe Doc 6 | from spacy.tokens import Doc 7 | 8 | # Texte désiré : "Oh, vraiment ?!" 9 | words = ["Oh", ",", "vraiment", "?", "!"] 10 | spaces = [False, True, True, False, False] 11 | 12 | # Crée un Doc à partir des mots et des espaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Charge le pipeline fr_core_news_md 4 | nlp = spacy.load("fr_core_news_md") 5 | 6 | # Traite le texte 7 | doc = nlp("Deux bananes en pyjamas") 8 | 9 | # Obtiens le vecteur pour le token "bananes" 10 | bananas_vector = doc[1].vector 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_md") 4 | 5 | doc1 = nlp("Le temps est au beau fixe") 6 | doc2 = nlp("Le ciel est clair") 7 | 8 | # Obtiens la similarité entre doc1 et doc2 9 | similarity = doc1.similarity(doc2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_md") 4 | 5 | doc = nlp("télé et livres") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Obtiens la similarité entre les tokens "télé" et "livres" 9 | similarity = token1.similarity(token2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/fr/solution_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_md") 4 | 5 | doc = nlp("C'était un super restaurant. Ensuite nous sommes allés dans un bar vraiment sympa.") 6 | 7 | # Crée des spans pour "super restaurant" et "bar vraiment sympa" 8 | span1 = doc[3:5] 9 | span2 = doc[12:15] 10 | 11 | # Obtiens la similarité entre les spans 12 | similarity = span1.similarity(span2) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/fr/solution_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Charge le pipeline fr_core_news_sm 4 | nlp = spacy.load("fr_core_news_sm") 5 | 6 | # Affiche les noms des composants du pipeline 7 | print(nlp.pipe_names) 8 | 9 | # Affiche tous les tuples de (name, component) du pipeline 10 | print(nlp.pipeline) 11 | -------------------------------------------------------------------------------- /exercises/fr/solution_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("fr_core_news_sm") 5 | 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Traite les textes et affiche les adjectifs 10 | for doc in nlp.pipe(TEXTS): 11 | print([token.text for token in doc if token.pos_ == "ADJ"]) 12 | -------------------------------------------------------------------------------- /exercises/fr/solution_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("fr_core_news_sm") 5 | 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Traite les textes et affiche les entités 10 | docs = list(nlp.pipe(TEXTS)) 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/fr/solution_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("fr") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Crée une liste de motifs pour le PhraseMatcher 8 | patterns = list(nlp.pipe(people)) 9 | -------------------------------------------------------------------------------- /exercises/fr/solution_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("fr_core_news_sm") 4 | text = ( 5 | "Le groupe aéronautique Airbus construit des avions et des " 6 | "hélicoptères vendus dans le monde entier. Le siège opérationnel du " 7 | "groupe est situé en France à Toulouse dans la région Occitanie." 8 | ) 9 | 10 | # Tokenise seulement le texte 11 | doc = nlp.make_doc(text) 12 | print([token.text for token in doc]) 13 | -------------------------------------------------------------------------------- /exercises/fr/solution_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy init config ./config.cfg --lang fr --pipeline ner 2 | -------------------------------------------------------------------------------- /exercises/fr/solution_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/fr/solution_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy train ./exercises/fr/config_gadget.cfg --output ./output --paths.train ./exercises/fr/train_gadget.spacy --paths.dev ./exercises/fr/dev_gadget.spacy 2 | -------------------------------------------------------------------------------- /exercises/fr/test_02_05_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "from spacy.tokens import Doc" in __solution__ 4 | ), "Importes-tu correctement la classe Doc ?" 5 | assert doc.text == "spaCy est cool.", "Es-tu certain d'avoir créé correctement le Doc ?" 6 | assert "print(doc.text)" in __solution__, "Affiches-tu le texte du Doc ?" 7 | __msg__.good("Bien joué !") 8 | -------------------------------------------------------------------------------- /exercises/fr/test_02_10_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc1.similarity(doc2)" or "doc2.similarity(doc1)" in __solution__ 4 | ), "Compares-tu la similarité entre les deux docs ?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "La valeur de similarité doit être un nombre flottant. L'as-tu calculé correctement ?" 8 | __msg__.good("Bien joué !") 9 | -------------------------------------------------------------------------------- /exercises/fr/test_02_10_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token1.similarity(token2)" or "token2.similarity(token1)" in __solution__ 4 | ), "Compares-tu la similarité entre les deux tokens ?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "La valeur de similarité doit être un nombre flottant. L'as-tu calculé correctement ?" 8 | __msg__.good("Bien joué !") 9 | -------------------------------------------------------------------------------- /exercises/fr/test_03_14_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "for doc in nlp.pipe(TEXTS)" in __solution__ 4 | ), "Itères-tu sur les docs générés par nlp.pipe ?" 5 | __msg__.good("Joli !") 6 | -------------------------------------------------------------------------------- /exercises/fr/test_03_14_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "docs = list(nlp.pipe(TEXTS))" in __solution__ 4 | ), "Utilises-tu nlp.pipe enveloppé dans une liste ?" 5 | __msg__.good("Super boulot !") 6 | -------------------------------------------------------------------------------- /exercises/fr/test_03_14_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "patterns = list(nlp.pipe(people))" in __solution__ 4 | ), "Utilises-tu nlp.pipe enveloppé dans une liste ?" 5 | 6 | __msg__.good( 7 | "Bon boulot ! Passons à un exemple pratique qui utilise nlp.pipe " 8 | "pour traiter des documents avec des métadonnées supplémentaires." 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/fr/test_03_16_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc = nlp.make_doc(text)" in __solution__ 4 | or "doc = nlp.tokenizer(text)" in __solution__ 5 | ), "Est-ce que tu tokenises seulement le texte ?" 6 | 7 | __msg__.good("Bien joué !") 8 | -------------------------------------------------------------------------------- /exercises/fr/train_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/fr/train_gadget.spacy -------------------------------------------------------------------------------- /exercises/ja/dev_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/ja/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/ja/exc_01_02_01.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポート 2 | import ____ 3 | 4 | # 英語のnlpオブジェクトを作成 5 | nlp = ____ 6 | 7 | # テキストを処理 8 | doc = nlp("This is a sentence.") 9 | 10 | # docのテキストをプリント 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_02_02.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポート 2 | import ____ 3 | 4 | # ドイツ語のnlpオブジェクトを作成 5 | nlp = ____ 6 | 7 | # テキストを処理(ドイツ語で「よろしく!」の意味) 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # docのテキストをプリント 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_02_03.py: -------------------------------------------------------------------------------- 1 | # spacyをインポート 2 | import ____ 3 | 4 | # スペイン語のnlpオブジェクトを作成 5 | nlp = ____ 6 | 7 | # テキストを処理(スペイン語で「おげんきですか?」の意味) 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # docのテキストをプリント 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_02_04.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポート 2 | import ____ 3 | 4 | # 日本語のnlpオブジェクトを作成 5 | nlp = ____ 6 | 7 | # テキストを処理 8 | doc = nlp("有難うございます。") 9 | 10 | # docのテキストをプリント 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_03_01.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポートして、日本語のnlpオブジェクトを作成 2 | import ____ 3 | 4 | nlp = ____ 5 | 6 | # テキストを処理 7 | doc = ____("私はツリーカンガルーとイッカクが好きです。") 8 | 9 | # 最初のトークンを選択 10 | first_token = doc[____] 11 | 12 | # 最初のトークンのテキストをプリント 13 | print(first_token.____) 14 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_03_02.py: -------------------------------------------------------------------------------- 1 | # spacyをインポートして、日本語のnlpオブジェクトを作成 2 | import spacy 3 | 4 | nlp = ____ 5 | 6 | # テキストを処理 7 | doc = ____("私はツリーカンガルーとイルカが好きです。") 8 | 9 | # 「ツリーカンガルー」のスライスを選択 10 | tree_kangaroos = ____ 11 | print(tree_kangaroos.text) 12 | 13 | # 「ツリーカンガルーとイッカク」のスライスを選択 14 | tree_kangaroos_and_narwhals = ____ 15 | print(tree_kangaroos_and_narwhals.text) 16 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 「ja_core_news_sm」パイプラインをロード 4 | nlp = ____ 5 | 6 | text = "公式発表:Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。" 7 | 8 | # テキストを処理 9 | doc = ____ 10 | 11 | # docのテキストをプリント 12 | print(____.____) 13 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_08_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | 5 | text = "公式発表:Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。" 6 | 7 | # テキストを処理 8 | doc = ____ 9 | 10 | for token in doc: 11 | # トークンの文字列、品詞タグ、依存関係ラベルを取得 12 | token_text = ____.____ 13 | token_pos = ____.____ 14 | token_dep = ____.____ 15 | # フォーマットしてプリント 16 | print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}") 17 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | 5 | text = "公式発表:Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。" 6 | 7 | # テキストを処理 8 | doc = ____ 9 | 10 | # 予測された固有表現をイテレート 11 | for ent in ____.____: 12 | # 固有表現の文字列とラベルをプリント 13 | print(ent.____, ____.____) 14 | -------------------------------------------------------------------------------- /exercises/ja/exc_01_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | 5 | text = "静岡県にある三保の松原は世界遺産の一部です。" 6 | 7 | # テキストを処理 8 | doc = ____ 9 | 10 | # 固有表現をイテレート 11 | for ____ in ____.____: 12 | # 固有表現の文字列とラベルをプリント 13 | print(____.____, ____.____) 14 | 15 | # 三保の松原のスパンを取得 16 | mihonomatsubara = ____ 17 | 18 | # スパンの文字列をプリント 19 | print("Missing entity:", mihonomatsubara.text) 20 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | doc = nlp("私はネコを飼っています") 5 | 6 | # 単語「ネコ」のハッシュを引く 7 | cat_hash = ____.____.____[____] 8 | print(cat_hash) 9 | 10 | # cat_hashを使って文字列を引く 11 | cat_string = ____.____.____[____] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | doc = nlp("デヴィッド・ボウイはPERSONです") 5 | 6 | # ラベル「PERSON」のハッシュを引く 7 | person_hash = ____.____.____[____] 8 | print(person_hash) 9 | 10 | # person_hashを引いて文字列を取得 11 | person_string = ____.____.____[____] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | # Docクラスをインポート 6 | from ____ import ____ 7 | 8 | # 作りたいテキスト:「spaCyは素晴らしい!」 9 | words = ["spaCy", "は", "素晴らしい", "!"] 10 | spaces = [False, False, False, False] 11 | 12 | # wordsとspacesからDocを作成 13 | doc = ____(____, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | # Docクラスをインポート 6 | from ____ import ____ 7 | 8 | # 作りたいテキスト:「さあ、始めよう!」 9 | words = ["さあ", "、", "初めよう", "!"] 10 | spaces = [____, ____, ____, ____] 11 | 12 | # wordsとspacesからDocを作成 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | # Docクラスをインポート 6 | from ____ import ____ 7 | 8 | # 作成したいテキスト:「本当ですか?!」 9 | words = [____, ____, ____, ____, ____] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Docをwordsとspacesから作成 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # ja_core_news_mdモデルをロード 4 | nlp = ____ 5 | 6 | # テキストを処理 7 | doc = nlp("パジャマを着た2つのバナナ") 8 | 9 | # 「バナナ」のベクトルを取得 10 | bananas_vector = ____.____ 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_md") 4 | 5 | doc1 = nlp("暖かい夏の日です") 6 | doc2 = nlp("外は晴れています") 7 | 8 | # doc1とdoc2の類似度を取得 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_md") 4 | 5 | doc = nlp("テレビと本") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # 「テレビ」と「本」の類似度を取得 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/ja/exc_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_md") 4 | 5 | doc = nlp("素晴らしいレストランでした。その後、私達はとても素敵なバーに行きました。") 6 | 7 | # 「素晴らしいレストラン」と「とても素敵なバー」のスパンを作る 8 | span1 = ____ 9 | span2 = ____ 10 | 11 | # スパンの類似度をはかる 12 | similarity = ____.____(____) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # ja_core_news_sm モデルを読み込む 4 | nlp = ____ 5 | 6 | # パイプラインの名前を表示 7 | print(____.____) 8 | 9 | # (name, component) のタプルからなるパイプライン情報を表示 10 | print(____.____) 11 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_09_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Token 3 | 4 | nlp = spacy.blank("ja") 5 | 6 | # デフォルト値がFalseである拡張属性「is_country」をトークンに追加 7 | ____.____(____, ____=____) 8 | 9 | # テキストを処理し、「スペイン」のトークンについてis_country属性をTrueにする 10 | doc = nlp("私はスペインに住んでいます。") 11 | ____ = True 12 | 13 | # すべてのトークンについて、文字列とis_country属性を表示 14 | print([(____, ____) for token in doc]) 15 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_09_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Token 3 | 4 | nlp = spacy.blank("ja") 5 | 6 | # トークンを受け取り、文字列を反転させたものを返すゲッターを定義 7 | def get_reversed(token): 8 | return token.text[::-1] 9 | 10 | 11 | # トークンの「reversed」プロパティ属性にget_reversedをゲッターとして登録 12 | ____.____(____, ____=____) 13 | 14 | # テキストを処理し、それぞれのトークンについてreversed属性を表示 15 | doc = nlp("あらゆる一般化は間違っている。これも含めて。") 16 | for ____ in ____: 17 | print("反転:", ____) 18 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Span 3 | 4 | nlp = spacy.blank("ja") 5 | 6 | # メソッドを定義 7 | def to_html(span, tag): 8 | # スパンのテキストをHTMLタグに入れて返す 9 | return f"<{tag}>{span.text}" 10 | 11 | 12 | # to_htmlをスパンの「to_html」拡張属性に登録 13 | ____.____(____, ____=____) 14 | 15 | # テキストを処理し、「strong」タグを用いてスパンのto_htmlメソッドを呼びだす 16 | doc = nlp("おはようございます、 これは文章です。") 17 | span = doc[0:3] 18 | print(____) 19 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("ja_core_news_sm") 5 | 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # テキストを処理し、形容詞を表示 10 | for text in TEXTS: 11 | doc = nlp(text) 12 | print([token.text for token in doc if token.pos_ == "ADJ"]) 13 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("ja_core_news_sm") 5 | 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # テキストを処理し、固有表現を表示 10 | docs = [nlp(text) for text in TEXTS] 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | people = ["デヴィッド・ボウイ", "アンゲラ・メルケル", "レディー・ガガ"] 6 | 7 | # PhraseMatcherのパターンのリストを作成 8 | patterns = [nlp(person) for person in people] 9 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | text = ( 5 | "チックフィレイはジョージア州カレッジパークに本社を置く、" 6 | "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。" 7 | ) 8 | 9 | # トークナイズのみ行う 10 | doc = nlp(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/ja/exc_03_16_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | text = ( 5 | "チックフィレイはジョージア州カレッジパークに本社を置く、" 6 | "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。" 7 | ) 8 | 9 | # parserを無効化 10 | with ____.____(____): 11 | # テキストを処理する 12 | doc = ____ 13 | # docの固有表現を表示 14 | print(____) 15 | -------------------------------------------------------------------------------- /exercises/ja/exc_04_06.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 空の「ja」パイプラインを作成 4 | nlp = ____ 5 | 6 | # 新しい固有表現抽出器を作成し、パイプラインに追加 7 | ner = ____ 8 | 9 | # 「GADGET」ラベルを固有表現抽出器に追加 10 | ____.____ 11 | -------------------------------------------------------------------------------- /exercises/ja/exc_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ ____ --___ ____ --____ ____ 2 | -------------------------------------------------------------------------------- /exercises/ja/exc_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/ja/exc_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____ 2 | -------------------------------------------------------------------------------- /exercises/ja/gadgets.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["iPhone Xの注文方法", { "entities": [[0, 8, "GADGET"]] }], 3 | ["iPhone Xが発売される", { "entities": [[0, 8, "GADGET"]] }], 4 | ["iPhone Xに10万円の価値ある?", { "entities": [[0, 8, "GADGET"]] }], 5 | ["iPhone 8のレビューはこれ!", { "entities": [[0, 8, "GADGET"]] }], 6 | ["iPhoneのiOS11へのアップデートが今日ある", { "entities": [[0, 6, "GADGET"]] }], 7 | ["新しいスマホが欲しい!どうしたらいい?", { "entities": [] }] 8 | ] 9 | -------------------------------------------------------------------------------- /exercises/ja/iphone.json: -------------------------------------------------------------------------------- 1 | [ 2 | "iPhone Xの注文方法", 3 | "iPhone Xが発売される", 4 | "iPhone Xに10万円の価値ある?", 5 | "iPhone 8のレビューはここにある", 6 | "iPhone 11とiPhone 8の違いは?", 7 | "新しいスマホが欲しい!どうしたらいい?" 8 | ] 9 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_02_01.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポート 2 | import spacy 3 | 4 | # nlpオブジェクトを作成 5 | nlp = spacy.blank("en") 6 | 7 | # テキストを処理 8 | doc = nlp("This is a sentence.") 9 | 10 | # docのテキストをプリント 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_02_02.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポート 2 | import spacy 3 | 4 | # ドイツ語のnlpオブジェクトを作成 5 | nlp = spacy.blank("de") 6 | 7 | # テキストを処理(ドイツ語で「よろしく!」の意味) 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # docのテキストをプリント 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_02_03.py: -------------------------------------------------------------------------------- 1 | # spacyをインポート 2 | import spacy 3 | 4 | # スペイン語のnlpオブジェクトを作成 5 | nlp = spacy.blank("es") 6 | 7 | # テキストを処理(スペイン語で「おげんきですか?」の意味) 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # docのテキストをプリント 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_02_04.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポート 2 | import spacy 3 | 4 | # nlpオブジェクトを作成 5 | nlp = spacy.blank("ja") 6 | 7 | # テキストを処理 8 | doc = nlp("有難うございます。") 9 | 10 | # docのテキストをプリント 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_03_01.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポートし、日本語のnlpオブジェクトを作成 2 | import spacy 3 | 4 | nlp = spacy.blank("ja") 5 | 6 | # テキストを処理 7 | doc = nlp("私はツリーカンガルーとイッカクが好きです。") 8 | 9 | # 最初のトークンを選択 10 | first_token = doc[0] 11 | 12 | # 最初のトークンのテキストをプリント 13 | print(first_token.text) 14 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_03_02.py: -------------------------------------------------------------------------------- 1 | # spaCyをインポートし、日本語のnlpオブジェクトを作成 2 | import spacy 3 | 4 | nlp = spacy.blank("ja") 5 | 6 | # テキストを処理 7 | doc = nlp("私はツリーカンガルーとイッカクが好きです。") 8 | 9 | # 「ツリーカンガルー」のスライスを選択 10 | tree_kangaroos = doc[2:4] 11 | print(tree_kangaroos.text) 12 | 13 | # 「ツリーカンガルーとイッカク」のスライスを選択 14 | tree_kangaroos_and_narwhals = doc[2:6] 15 | print(tree_kangaroos_and_narwhals.text) 16 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 「ja_core_news_sm」パイプラインをロード 4 | nlp = spacy.load("ja_core_news_sm") 5 | 6 | text = "公式発表:Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。" 7 | 8 | # テキストを処理 9 | doc = nlp(text) 10 | 11 | # docのテキストをプリント 12 | print(doc.text) 13 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_08_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | 5 | text = "公式発表:Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。" 6 | 7 | # テキストを処理 8 | doc = nlp(text) 9 | 10 | for token in doc: 11 | # トークンの文字列、品詞タグ、依存関係ラベルを取得 12 | token_text = token.text 13 | token_pos = token.pos_ 14 | token_dep = token.dep_ 15 | # フォーマットしてプリント 16 | print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}") 17 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | 5 | text = "公式発表:Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。" 6 | 7 | # テキストを処理 8 | doc = nlp(text) 9 | 10 | # 予測された固有表現をイテレート 11 | for ent in doc.ents: 12 | # 固有表現の文字列とラベルをプリント 13 | print(ent.text, ent.label_) 14 | -------------------------------------------------------------------------------- /exercises/ja/solution_01_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | 5 | text = "静岡県にある三保の松原は世界遺産の一部です。" 6 | 7 | # テキストを処理 8 | doc = nlp(text) 9 | 10 | # 固有表現をイテレート 11 | for ent in doc.ents: 12 | # 固有表現の文字列とラベルをプリント 13 | print(ent.text, ent.label_) 14 | 15 | # 三保の松原のスパンを取得 16 | mihonomatsubara = doc[4:7] 17 | 18 | # スパンの文字列をプリント 19 | print("Missing entity:", mihonomatsubara.text) -------------------------------------------------------------------------------- /exercises/ja/solution_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | doc = nlp("私はネコを飼っています") 5 | 6 | # 単語「ネコ」のハッシュを引く 7 | cat_hash = nlp.vocab.strings["ネコ"] 8 | print(cat_hash) 9 | 10 | # cat_hashを使って文字列を引く 11 | cat_string = nlp.vocab.strings[cat_hash] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | doc = nlp("デヴィッド・ボウイはPERSONです") 5 | 6 | # ラベル「PERSON」のハッシュを引く 7 | person_hash = nlp.vocab.strings["PERSON"] 8 | print(person_hash) 9 | 10 | # person_hashを引いて文字列を取得 11 | person_string = nlp.vocab.strings[person_hash] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | # Docクラスをインポート 6 | from spacy.tokens import Doc 7 | 8 | # 作りたいテキスト:「spaCyは素晴らしい!」 9 | words = ["spaCy", "は", "素晴らしい", "!"] 10 | spaces = [False, False, False, False] 11 | 12 | # wordsとspacesからDocを作成 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | # Docクラスをインポート 6 | from spacy.tokens import Doc 7 | 8 | # 作りたいテキスト:「さあ、始めよう!」 9 | words = ["さあ", "、", "始めよう", "!"] 10 | spaces = [False, False, False, False] 11 | 12 | # wordsとspacesからDocを作成 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | # Docクラスをインポート 6 | from spacy.tokens import Doc 7 | 8 | # 作成したいテキスト:「本当ですか?!」 9 | words = ["本当", "です", "か", "?", "!"] 10 | spaces = [False, False, False, False, False] 11 | 12 | # Docをwordsとspacesから作成 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | doc = nlp("ベルリンはいい街だと思う") 5 | 6 | # すべてのトークンの文字列と品詞タグを取得 7 | for token in doc: 8 | # 現在のトークンが固有名詞かどうかをチェック 9 | if token.pos_ == "PROPN": 10 | # 次のトークンが設置詞かどうかをチェック 11 | if doc[token.i + 1].pos_ == "ADP": 12 | print("設置詞の前の固有名詞が見つかりました:", token.text) 13 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # ja_core_news_mdモデルをロード 4 | nlp = spacy.load("ja_core_news_md") 5 | 6 | # テキストを処理 7 | doc = nlp("パジャマを着た2つのバナナ") 8 | 9 | # 「バナナ」のベクトルを取得 10 | bananas_vector = doc[7].vector 11 | print(bananas_vector) 12 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_md") 4 | 5 | doc1 = nlp("暖かい夏の日です") 6 | doc2 = nlp("外は晴れています") 7 | 8 | # doc1とdoc2の類似度を取得 9 | similarity = doc1.similarity(doc2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_md") 4 | 5 | doc = nlp("テレビと本") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # 「テレビ」と「本」の類似度を取得 9 | similarity = token1.similarity(token2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/ja/solution_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_md") 4 | 5 | doc = nlp("素晴らしいレストランでした。その後、私達はとても素敵なバーに行きました。") 6 | 7 | # 「素晴らしいレストラン」と「とても素敵なバー」のスパンを作る 8 | span1 = doc[0:2] 9 | span2 = doc[11:15] 10 | 11 | # スパンの類似度をはかる 12 | similarity = span1.similarity(span2) 13 | print(similarity) 14 | -------------------------------------------------------------------------------- /exercises/ja/solution_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # ja_core_news_sm モデルを読み込む 4 | nlp = spacy.load("ja_core_news_sm") 5 | 6 | # パイプラインの名前を出力 7 | print(nlp.pipe_names) 8 | 9 | # (name, component) のタプルからなるパイプライン情報を表示 10 | print(nlp.pipeline) 11 | -------------------------------------------------------------------------------- /exercises/ja/solution_03_09_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Token 3 | 4 | nlp = spacy.blank("ja") 5 | 6 | # デフォルト値がFalseである拡張属性「is_country」をトークンに追加 7 | Token.set_extension("is_country", default=False) 8 | 9 | # テキストを処理し、「スペイン」のトークンについてis_country属性をTrueにする 10 | doc = nlp("私はスペインに住んでいます。") 11 | doc[2]._.is_country = True 12 | 13 | # すべてのトークンについて、文字列とis_country属性を表示 14 | print([(token.text, token._.is_country) for token in doc]) 15 | -------------------------------------------------------------------------------- /exercises/ja/solution_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("ja_core_news_sm") 5 | 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # テキストを処理し、形容詞を表示 10 | for doc in nlp.pipe(TEXTS): 11 | 12 | print([token.text for token in doc if token.pos_ == "ADJ"]) 13 | -------------------------------------------------------------------------------- /exercises/ja/solution_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("ja_core_news_sm") 5 | 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # テキストを処理し、固有表現を表示 10 | docs = list(nlp.pipe(TEXTS)) 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/ja/solution_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("ja") 4 | 5 | people = ["デヴィッド・ボウイ", "アンゲラ・メルケル", "レディー・ガガ"] 6 | 7 | # PhraseMatcherのパターンのリストを作成 8 | patterns = list(nlp.pipe(people)) 9 | -------------------------------------------------------------------------------- /exercises/ja/solution_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | text = ( 5 | "チックフィレイはジョージア州カレッジパークに本社を置く、" 6 | "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。" 7 | ) 8 | 9 | # トークナイズのみ行う 10 | doc = nlp.make_doc(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/ja/solution_03_16_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("ja_core_news_sm") 4 | text = ( 5 | "チックフィレイはジョージア州カレッジパークに本社を置く、" 6 | "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。" 7 | ) 8 | 9 | # parserを無効化 10 | with nlp.select_pipes(disable=["parser"]): 11 | # テキストを処理する 12 | doc = nlp(text) 13 | # docの固有表現を表示 14 | print(doc.ents) 15 | -------------------------------------------------------------------------------- /exercises/ja/solution_04_06.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 空の「ja」モデルを作成 4 | nlp = spacy.blank("ja") 5 | 6 | # 新しい固有表現抽出器を作成し、パイプラインに追加 7 | ner = nlp.add_pipe("ner") 8 | 9 | # 「GADGET」ラベルを固有表現抽出器に追加 10 | ner.add_label("GADGET") 11 | -------------------------------------------------------------------------------- /exercises/ja/solution_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy init config ./config.cfg --lang ja --pipeline ner 2 | -------------------------------------------------------------------------------- /exercises/ja/solution_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/ja/solution_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy train ./exercises/ja/config_gadget.cfg --output ./output --paths.train ./exercises/ja/train_gadget.spacy --paths.dev ./exercises/ja/dev_gadget.spacy 2 | -------------------------------------------------------------------------------- /exercises/ja/test_01_02_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.en 4 | 5 | assert isinstance( 6 | nlp, spacy.lang.en.English 7 | ), "nlpオブジェクトはEnglishクラスのインスタンスでなければなりません" 8 | assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか?" 9 | assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか?" 10 | 11 | __msg__.good("正解です!") 12 | -------------------------------------------------------------------------------- /exercises/ja/test_01_02_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.de 4 | 5 | assert isinstance(nlp, spacy.lang.de.German), "nlpオブジェクトはGermanクラスのインスタンスでなければなりません" 6 | assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか?" 7 | assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか?" 8 | 9 | __msg__.good("正解です!") 10 | -------------------------------------------------------------------------------- /exercises/ja/test_01_02_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.es 4 | 5 | assert isinstance( 6 | nlp, spacy.lang.es.Spanish 7 | ), "nlpオブジェクトはSpanishクラスのインスタンスでなければなりません" 8 | assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか?" 9 | assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか?" 10 | 11 | __msg__.good("Perfecto! doc、トークン、スパンに行きましょう。") 12 | -------------------------------------------------------------------------------- /exercises/ja/test_01_02_04.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.ja 4 | 5 | assert isinstance( 6 | nlp, spacy.lang.ja.Japanese 7 | ), "nlpオブジェクトはJapaneseクラスのインスタンスでなければなりません" 8 | assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか?" 9 | assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか?" 10 | 11 | __msg__.good("完璧です! doc、トークン、スパンに行きましょう。") 12 | -------------------------------------------------------------------------------- /exercises/ja/test_01_03_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert doc.text == "私はツリーカンガルーとイッカクが好きです。", "テキストをちゃんと処理しましたか?" 3 | assert first_token == doc[0], "最初のトークンを選択しましたか?" 4 | assert "print(first_token.text)" in __solution__, "トークンのテキストをプリントしましたか?" 5 | assert 'spacy.blank("ja")' in __solution__, 'spacy.blankに指定する言語は合っていますか?' 6 | __msg__.good("よくできました!") 7 | -------------------------------------------------------------------------------- /exercises/ja/test_01_03_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert doc.text == "私はツリーカンガルーとイッカクが好きです。", "テキストをちゃんと処理しましたか?" 3 | assert tree_kangaroos == doc[2:4], "ツリーカンガルーのスパンを選択しましたか?" 4 | assert ( 5 | tree_kangaroos_and_narwhals == doc[2:6] 6 | ), "ツリーカンガルーとイッカクのスパンを選択しましたか?" 7 | __msg__.good("よくできました!") 8 | -------------------------------------------------------------------------------- /exercises/ja/test_01_07.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "spacy.load" in __solution__, "spacy.loadを呼び出しましたか?" 3 | assert nlp.meta["lang"] == "ja", "正しいモデルを呼び出しましたか?" 4 | assert nlp.meta["name"] == "core_news_sm", "正しいモデルを呼び出しましたか?" 5 | assert "nlp(text)" in __solution__, "テキストをちゃんと処理しましたか?" 6 | assert "print(doc.text)" in __solution__, "docのテキストをプリントしましたか?" 7 | 8 | __msg__.good("よくできました!パイプラインのロードのやりかたを学んだので、モデルを用いた解析の方法を見ていきましょう。") 9 | -------------------------------------------------------------------------------- /exercises/ja/test_01_08_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "token_text = token.text" in __solution__, "トークンの文字列をちゃんと取得していますか?" 3 | assert ( 4 | "token_pos = token.pos_" in __solution__ 5 | ), "トークンの品詞タグをちゃんと取得していますか?文字列属性を取得するには、アンダースコアを用いることを忘れないでください。" 6 | assert ( 7 | "token_dep = token.dep_" in __solution__ 8 | ), "トークンの依存関係ラベルをちゃんと取得していますか?文字列属性を取得するには、アンダースコアを用いることを忘れないでください。" 9 | __msg__.good("Perfect!") 10 | -------------------------------------------------------------------------------- /exercises/ja/test_01_08_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "for ent in doc.ents" in __solution__, "固有表現をイテレートしていますか?" 3 | assert "print(ent.text, ent.label_)" in __solution__, "文字列とラベルをプリントしましたか?" 4 | 5 | __msg__.good( 6 | "素晴らしい!ここでは、モデルはすべての例で正しい予測を行いました。" "次の演習では、モデルが予測を誤る例を見ていき、モデルを修正する方法を学びます。" 7 | ) 8 | -------------------------------------------------------------------------------- /exercises/ja/test_01_09.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "in doc.ents" in __solution__, "固有表現をイテレートしましたか?" 3 | assert mihonomatsubara.text == "三保の松原", "mihonomatsubara変数は正しいスライスですか?" 4 | 5 | __msg__.good( 6 | "完璧です!もちろん、いつもこのように手動でやる必要はありません。" 7 | "次の演習では、単語やフレーズを探すためのルールベースのmatcherについて学んでいきます。" 8 | ) 9 | -------------------------------------------------------------------------------- /exercises/ja/test_02_02_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert cat_hash == nlp.vocab.strings["ネコ"], "正しいhashを代入しましたか?" 3 | assert 'nlp.vocab.strings["ネコ"]' in __solution__, "正しい文字列を取得しましたか?" 4 | assert cat_string == "ネコ", "正しい文字列を取得しましたか?" 5 | assert "nlp.vocab.strings[cat_hash]" in __solution__, "hashから文字列を取得しましたか?" 6 | 7 | __msg__.good("素晴らしい!") 8 | -------------------------------------------------------------------------------- /exercises/ja/test_02_02_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert person_hash == nlp.vocab.strings["PERSON"], "正しいhashを代入しましたか?" 3 | assert 'nlp.vocab.strings["PERSON"]' in __solution__, "正しいhashを代入しましたか?" 4 | assert person_string == "PERSON", "正しい文字列を取得しましたか?" 5 | assert "nlp.vocab.strings[person_hash]" in __solution__, "hashから文字列を取得しましたか?" 6 | 7 | __msg__.good("Good job!") 8 | -------------------------------------------------------------------------------- /exercises/ja/test_02_05_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "from spacy.tokens import Doc" in __solution__, "Docクラスをちゃんとインポートしましたか?" 3 | assert doc.text == "spaCyは素晴らしい!", "Docをちゃんと作成しましたか?" 4 | assert "print(doc.text)" in __solution__, "Docの文字列をプリントしましたか?" 5 | __msg__.good("よくできました!") 6 | -------------------------------------------------------------------------------- /exercises/ja/test_02_05_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "from spacy.tokens import Doc" in __solution__, "Docクラスをちゃんとインポートしましたか?" 3 | assert len(spaces) == 4, "Docをちゃんと作成しましたか?" 4 | assert all(isinstance(s, bool) for s in spaces), "spacesはブール値である必要があります。" 5 | assert [int(s) for s in spaces] == [0, 0, 0, 0], "スペースは正しいですか?" 6 | assert doc.text == "さあ、始めよう!", "Docを正しく作成していますか?" 7 | __msg__.good("Nice!") 8 | -------------------------------------------------------------------------------- /exercises/ja/test_02_09.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert 'spacy.load("ja_core_news_md")' in __solution__, "中サイズのパイプラインをロードしましたか?" 3 | assert "doc[7].vector" in __solution__, "正しいベクトルを取得しましたか?" 4 | __msg__.good("Well done!次章では、単語ベクトルを用いたdoc、スパン、トークン間の類似度の予測を行います。") 5 | -------------------------------------------------------------------------------- /exercises/ja/test_02_10_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc1.similarity(doc2)" or "doc2.similarity(doc1)" in __solution__ 4 | ), "2つのdocの類似度を比較しましたか?" 5 | assert 0 <= float(similarity) <= 1, "simirlarityは浮動小数点数である必要があります。きちんと計算しましたか?" 6 | __msg__.good("Well done!") 7 | -------------------------------------------------------------------------------- /exercises/ja/test_02_10_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token1.similarity(token2)" or "token2.similarity(token1)" in __solution__ 4 | ), "2つのdocの類似度を比較しましたか?" 5 | assert 0 <= float(similarity) <= 1, "simirlarityは浮動小数点数である必要があります。きちんと計算しましたか?" 6 | __msg__.good("Nicely done!") 7 | -------------------------------------------------------------------------------- /exercises/ja/test_03_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert nlp.meta["name"] == "core_news_sm", "正しいパイプラインをロードしましたか?" 3 | assert nlp.meta["lang"] == "ja", "正しいパイプラインをロードしましたか?" 4 | assert "print(nlp.pipe_names)" in __solution__, "パイプラインの名前をプリントしましたか?" 5 | assert "print(nlp.pipeline)" in __solution__, "パイプラインをプリントしましたか?" 6 | 7 | __msg__.good( 8 | "Well done!今あるパイプラインについて調べたくなったときは、nlp.pipe_namesやnlp.pipelineを使ってプリントしましょう。" 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/ja/test_03_06.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "len(doc)" in __solution__, "docの長さを取得しましたか?" 3 | assert "return doc" in __solution__, "docを返しましたか?" 4 | assert "nlp.add_pipe" in __solution__, "コンポーネントを返しましたか?" 5 | assert "first=True" in __solution__, "パイプラインの最初にコンポーネントを追加しましたか?" 6 | assert nlp.pipe_names[0] == "length_component", "パイプラインの名前が正しくないようです!" 7 | 8 | __msg__.good("Perfect!もう少し複雑なコンポーネントを見ていきましょう!") 9 | -------------------------------------------------------------------------------- /exercises/ja/test_03_07.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert 'after="ner"' in __solution__, "明示的に固有表現抽出器のあとにコンポーネントを追加しましたか?" 3 | assert nlp.pipe_names[-1] == "animal_component", "固有表現抽出器のあとにコンポーネントを追加しましたか?" 4 | assert len(doc.ents) == 2, "きちんと固有表現を追加しましたか?" 5 | assert all(ent.label_ == "ANIMAL" for ent in doc.ents), "ANIMALのラベルを追加しましたか?" 6 | 7 | __msg__.good("Good job!はじめてのカスタムパイプラインコンポーネントとしてルールベースの固有表現抽出器を作ることができましたね。") 8 | -------------------------------------------------------------------------------- /exercises/ja/test_03_09_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert Token.has_extension("reversed"), "拡張属性をトークンに追加しましたか?" 3 | ext = Token.get_extension("reversed") 4 | assert ext[2] is not None, "ゲッターをきちんと設定しましたか?" 5 | assert "getter=get_reversed" in __solution__, "get_reversedをゲッターとして登録しましたか?" 6 | assert "token._.reversed" in __solution__, "カスタム属性を取得しましたか?" 7 | 8 | __msg__.good("Good job!もっと複雑な属性を設定していきましょう。") 9 | -------------------------------------------------------------------------------- /exercises/ja/test_03_14_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "for doc in nlp.pipe(TEXTS)" in __solution__ 4 | ), "nlp.pipeによって生成されたdocをイテレートしましたか?" 5 | __msg__.good("Nice!") 6 | -------------------------------------------------------------------------------- /exercises/ja/test_03_14_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "docs = list(nlp.pipe(TEXTS))" in __solution__ 4 | ), "nlp.pipeの結果に対してlistを呼び出しましたか?" 5 | __msg__.good("Great work!") 6 | -------------------------------------------------------------------------------- /exercises/ja/test_03_14_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "patterns = list(nlp.pipe(people))" in __solution__ 4 | ), "nlp.pipeの結果に対してリストを呼び出しましたか?" 5 | 6 | __msg__.good("Good job!追加のメタデータとともにnlp.pipeを呼びだす実践的な例を見ていきましょう。") 7 | -------------------------------------------------------------------------------- /exercises/ja/test_03_16_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc = nlp.make_doc(text)" in __solution__ 4 | or "doc = nlp.tokenizer(text)" in __solution__ 5 | ), "トークナイズだけしましたか?" 6 | 7 | __msg__.good("Nicely done!") 8 | -------------------------------------------------------------------------------- /exercises/ja/test_03_16_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | 'with nlp.select_pipes(disable=["parser"])' in __solution__ 4 | ), "正しいコンポーネントに対して、nlp.select_pipesを呼び出しましたか?" 5 | 6 | __msg__.good( 7 | "Perfect!最適化のためのヒントや工夫について練習しました。" 8 | "次章では、spaCyのニューラルネットワークモデルのトレーニングを行います。" 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/ja/test_04_06.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert 'spacy.blank("ja")' in __solution__, "空の日本語パイプラインを作成しましたか?" 3 | assert ( 4 | len(nlp.pipe_names) == 1 and nlp.pipe_names[0] == "ner" 5 | ), "固有表現抽出器をパイプラインに追加しましたか?" 6 | assert len(ner.labels) == 1 and ner.labels[0] == "GADGET", "固有表現抽出器にラベルを追加しましたか?" 7 | 8 | __msg__.good("Well done!パイプラインの準備が完了たので、学習ループを書いていきましょう。") 9 | -------------------------------------------------------------------------------- /exercises/ja/train_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/ja/train_gadget.spacy -------------------------------------------------------------------------------- /exercises/ja/tweets.json: -------------------------------------------------------------------------------- 1 | [ 2 | "マクドナルドは私の大好きなお店です。", 3 | "ここでは@McDonaldsは調理済みのハンバーガしかないと思っていたのだが、どうやら未調理のものしかないのか?病気になっている暇はない...", 4 | "人々はいまだにマクドナルドを食べているのか:(", 5 | "スペインのマクドナルドには手羽先がある。私の心はとても幸せ。", 6 | "@MacDonalds どうか今までで一番美味しいハンバーガーのアーチデラックスを復活させてください!! :P", 7 | "早く開店して!私は#McRibのハンバーガが食べたい。", 8 | "今朝はマクドナルドで食べて、今お腹が痛くなっている。" 9 | ] -------------------------------------------------------------------------------- /exercises/pt/dev_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/pt/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/pt/exc_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importar a biblioteca spaCy 2 | import ____ 3 | 4 | # Crie um objeto nlp do Inglês 5 | nlp = ____ 6 | 7 | # Processe o texto 8 | doc = nlp("This is a sentence.") 9 | 10 | # Imprima o texto do documento 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/pt/exc_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importe a biblioteca spaCy 2 | import ____ 3 | 4 | # Crie um objeto nlp do Alemão 5 | nlp = ____ 6 | 7 | # Processe o texto (equivalente ao português: "Atenciosamente") 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Imprima o texto do documento 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/pt/exc_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importe a biblioteca spaCy 2 | import ____ 3 | 4 | # Crie um objeto nlp do Espanhol 5 | nlp = ____ 6 | 7 | # Processar o texto em espanhol (equivalente ao português: "Como vai?") 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Imprimir o texto do documento 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/pt/exc_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importar spacy e criar o objeto nlp do Português 2 | import ____ 3 | nlp = ____ 4 | 5 | # Processar o texto 6 | doc = ____("Eu gosto de gatos e cachorros.") 7 | 8 | # Selecionar o primeiro token 9 | first_token = doc[____] 10 | 11 | # Imprimir o texto do primeito token 12 | print(first_token.____) 13 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | doc = nlp("Eu tenho um gato amarelo.") 5 | 6 | # Consulte o código hash da palavra "gato" 7 | gato_hash = ____.____.____[____] 8 | print(gato_hash) 9 | 10 | # Agora consulte o gato_hash para obter a palavra novamente 11 | gato_string = ____.____.____[____] 12 | print(gato_string) 13 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | doc = nlp("David Bowie é uma PESSOA") 5 | 6 | # Consulte o código hash para a string "PESSOA" 7 | person_hash = ____.____.____[____] 8 | print(person_hash) 9 | 10 | # Consulte o person_hash para obter o texto novamente 11 | person_string = ____.____.____[____] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | 5 | # Importe a classe Doc 6 | from ____ import ____ 7 | 8 | # Texto desejado: "spaCy é bem legal!" 9 | words = ["spaCy", "é", "bem","legal","!"] 10 | spaces = [True, True, True, False, False] 11 | 12 | # Crie um Doc a partir das palavras words e o espaçamento spaces 13 | doc = ____(____, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | 5 | # Importe a classe Doc 6 | from ____ import ____ 7 | 8 | # Texto desejado: "Vamos lá, vamos começar!" 9 | words = ["Vamos","lá", ",", "vamos", "começar", "!"] 10 | spaces = [____, ____, ____, ____, ____, ____] 11 | 12 | # Crie um Doc a partir das palavras words e espaçamento spaces 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | 5 | # Importe a classe Doc 6 | from ____ import ____ 7 | 8 | # Texto desejado: "Oh, realmente?!" 9 | words = [____, ____, ____, ____, ____] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # Crie um Doc a partir das palavras words e espaçamento spaces 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carregue o fluxo de processamento en_core_web_md 4 | # para fazer o download do fluxo: python -m spacy download pt_core_news_md 5 | nlp = ____ 6 | 7 | # Processe um texto 8 | doc = nlp("Duas bananas de pijamas") 9 | 10 | # Imprima o vetor para "bananas" 11 | bananas_vector = ____.____ 12 | print(bananas_vector) 13 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_md") 4 | 5 | doc1 = nlp("Eu quero comprar um livro novo") 6 | doc2 = nlp("Preciso ler um livro") 7 | 8 | # Obtenha a similiridade entre doc1 e doc2 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_md") 4 | 5 | doc = nlp("Televisão e livro") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Obtenha a similaridade dos tokens "Televisão" e "livro" 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/pt/exc_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_md") 4 | 5 | doc = nlp("Visitamos um excelente restaurante. Em seguida fomos a um ótimo bar.") 6 | 7 | # Crie partições para "excelente restaurante" e "ótimo bar" 8 | span1 = ____ 9 | span2 = ____ 10 | print(span1) 11 | print(span2) 12 | 13 | # Obtenha a similaridade das partições 14 | similarity = ____.____(____) 15 | print(similarity) 16 | -------------------------------------------------------------------------------- /exercises/pt/exc_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carregue o fluxo de procesamento pt_core_news_sm 4 | nlp = ____ 5 | 6 | # Imprima o nome dos componentes do fluxo 7 | print(____.____) 8 | 9 | # Imprima as informações das tuplas (name, component) 10 | print(____.____) 11 | -------------------------------------------------------------------------------- /exercises/pt/exc_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("pt_core_news_sm") 5 | 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Processar os textos e imprimir os adjetivos 10 | for text in TEXTS: 11 | doc = nlp(text) 12 | print([token.text for token in doc if token.pos_ == "ADJ"]) 13 | -------------------------------------------------------------------------------- /exercises/pt/exc_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("pt_core_news_sm") 5 | 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Processar os textos e imprimir as entidades 10 | docs = [nlp(text) for text in TEXTS] 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/pt/exc_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Criar uma lista de padrões de correspondência para o PhraseMatcher 8 | patterns = [nlp(person) for person in people] 9 | -------------------------------------------------------------------------------- /exercises/pt/exc_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_sm") 4 | text = ( 5 | "Chick-fil-A é um restaurante fast-food com sede na cidade de College Park, " 6 | "estado da Georgia, especializado em sanduíches com carne de frango. " 7 | ) 8 | 9 | # Apenas toquenizar o texto 10 | doc = nlp(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/pt/exc_03_16_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_sm") 4 | text = ( 5 | "Chick-fil-A é um restaurante fast-food com sede na cidade de College Park, " 6 | "estado da Georgia, especializado em sanduíches com carne de frango. " 7 | ) 8 | 9 | # Desabilitar o lematizador 10 | with ____.____(____): 11 | # Processar o texto 12 | doc = ____ 13 | # Imprimir as entidades do doc 14 | print(____) 15 | -------------------------------------------------------------------------------- /exercises/pt/exc_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ ____ --___ ____ --____ ____ -------------------------------------------------------------------------------- /exercises/pt/exc_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg -------------------------------------------------------------------------------- /exercises/pt/exc_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____ -------------------------------------------------------------------------------- /exercises/pt/iphone.json: -------------------------------------------------------------------------------- 1 | [ 2 | "How to preorder the iPhone X", 3 | "iPhone X is coming", 4 | "Should I pay $1,000 for the iPhone X?", 5 | "The iPhone 8 reviews are here", 6 | "iPhone 11 vs iPhone 8: What's the difference?", 7 | "I need a new phone! Any tips?" 8 | ] 9 | -------------------------------------------------------------------------------- /exercises/pt/solution_01_02_01.py: -------------------------------------------------------------------------------- 1 | # Importar a biblioteca spaCy 2 | import spacy 3 | 4 | # Crie um objeto nlp do Inglês 5 | nlp = spacy.blank("en") 6 | 7 | # Processe o texto 8 | doc = nlp("This is a sentence.") 9 | 10 | # Imprima o texto do documento 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/pt/solution_01_02_02.py: -------------------------------------------------------------------------------- 1 | # Importe a biblioteca spaCy 2 | import spacy 3 | 4 | # Crie um objeto nlp do Alemão 5 | nlp = spacy.blank("de") 6 | 7 | # Processe o texto (equivalente ao português: "Atenciosamente") 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # Imprima o texto do documento 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/pt/solution_01_02_03.py: -------------------------------------------------------------------------------- 1 | # Importe a biblioteca spaCy 2 | import spacy 3 | 4 | # Crie um objeto nlp do Espanhol 5 | nlp = spacy.blank("es") 6 | 7 | # Processar o texto em espanhol (equivalente ao português: "Como vai?") 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # Imprimir o texto do documento 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/pt/solution_01_03_01.py: -------------------------------------------------------------------------------- 1 | # Importar spacy e criar o objeto nlp do Português 2 | import spacy 3 | nlp = spacy.blank("pt") 4 | 5 | # Processar o texto 6 | doc = nlp("Eu gosto de gatos e cachorros.") 7 | 8 | # Selecionar o primeiro token 9 | first_token = doc[0] 10 | 11 | # Imprimir o texto do primeito token 12 | print(first_token.text) 13 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | doc = nlp("Eu tenho um gato amarelo.") 5 | 6 | # Consulte o código hash da palavra "gato" 7 | gato_hash = nlp.vocab.strings["gato"] 8 | print(gato_hash) 9 | 10 | # Agora consulte o gato_hash para obter a palavra novamente 11 | gato_string = nlp.vocab.strings[gato_hash] 12 | print(gato_string) 13 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | doc = nlp("David Bowie é uma PESSOA") 5 | 6 | # Consulte o código hash para a string "PESSOA" 7 | person_hash = nlp.vocab.strings["PESSOA"] 8 | print(person_hash) 9 | 10 | # Consulte o person_hash para obter o texto novamente 11 | person_string = nlp.vocab.strings[person_hash] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | 5 | # Importe a classe Doc 6 | from spacy.tokens import Doc 7 | 8 | # Texto desejado: "spaCy é bem legal!" 9 | words = ["spaCy", "é", "bem","legal","!"] 10 | spaces = [True, True, True, False, False] 11 | 12 | # Crie um Doc a partir das palavras words e o espaçamento spaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("pt") 4 | 5 | # Importe a classe Doc 6 | from spacy.tokens import Doc 7 | 8 | # Texto desejado: "Oh, realmente?!" 9 | words = ["Oh", ",", "realmente", "?", "!"] 10 | spaces = [False, True, False, False, False] 11 | 12 | # Crie um Doc a partir das palavras words e espaçamento spaces 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carregue o fluxo de processamento en_core_web_md 4 | # para fazer o download do fluxo: python -m spacy download pt_core_news_md 5 | nlp = spacy.load("pt_core_news_md") 6 | 7 | # Processe um texto 8 | doc = nlp("Duas bananas de pijamas") 9 | 10 | # Imprima o vetor para "bananas" 11 | bananas_vector = doc[1].vector 12 | print(bananas_vector) 13 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_md") 4 | 5 | doc1 = nlp("Eu quero comprar um livro novo") 6 | doc2 = nlp("Preciso ler um livro") 7 | 8 | # Obtenha a similiridade entre doc1 e doc2 9 | similarity = doc1.similarity(doc2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_md") 4 | 5 | doc = nlp("Televisão e livro") 6 | token1, token2 = doc[0], doc[2] 7 | 8 | # Obtenha a similaridade dos tokens "Televisão" e "livro" 9 | similarity = token1.similarity(token2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/pt/solution_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_md") 4 | 5 | doc = nlp("Visitamos um excelente restaurante. Em seguida fomos a um ótimo bar.") 6 | 7 | # Crie partições para "excelente restaurante" e "ótimo bar" 8 | span1 = doc[2:4] 9 | span2 = doc[10:12] 10 | print(span1) 11 | print(span2) 12 | 13 | # Obtenha a similaridade das partições 14 | similarity = span1.similarity(span2) 15 | print(similarity) 16 | -------------------------------------------------------------------------------- /exercises/pt/solution_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # Carregue o fluxo de procesamento en_core_web_sm 4 | nlp = spacy.load("pt_core_news_sm") 5 | 6 | # Imprima o nome dos componentes do fluxo 7 | print(nlp.pipe_names) 8 | 9 | # Imprima as informações das tuplas (name, component) 10 | print(nlp.pipeline) 11 | -------------------------------------------------------------------------------- /exercises/pt/solution_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("pt_core_news_sm") 5 | 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Processar os textos e imprimir os adjetivos 10 | for doc in nlp.pipe(TEXTS): 11 | print([token.text for token in doc if token.pos_ == "ADJ"]) 12 | -------------------------------------------------------------------------------- /exercises/pt/solution_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("pt_core_news_sm") 5 | 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # Processar os textos e imprimir as entidades 10 | docs = list(nlp.pipe(TEXTS)) 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/pt/solution_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"] 6 | 7 | # Criar uma lista de padrões de correspondência para o PhraseMatcher 8 | patterns = list(nlp.pipe(people)) 9 | -------------------------------------------------------------------------------- /exercises/pt/solution_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("pt_core_news_sm") 4 | text = ( 5 | "Chick-fil-A é um restaurante fast-food com sede na cidade de College Park, " 6 | "estado da Georgia, especializado em sanduíches com carne de frango. " 7 | ) 8 | 9 | # Apenas toquenizar o texto 10 | doc = nlp.make_doc(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/pt/solution_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy init config ./config.cfg --lang en --pipeline ner -------------------------------------------------------------------------------- /exercises/pt/solution_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg -------------------------------------------------------------------------------- /exercises/pt/solution_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy train ./exercises/en/config_gadget.cfg --output ./output --paths.train ./exercises/en/train_gadget.spacy --paths.dev ./exercises/en/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/pt/test_02_05_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "from spacy.tokens import Doc" in __solution__ 4 | ), "Você fez a importação da classe Doc corretamente?" 5 | assert doc.text == "spaCy é bem legal!", "Você tem certeza que criou o Doc corretamente?" 6 | assert "print(doc.text)" in __solution__, "Você está imprimindo o texto do Doc?" 7 | __msg__.good("Muito bom!") 8 | -------------------------------------------------------------------------------- /exercises/pt/test_02_10_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__ 4 | ), "Você está comparando a similaridade entre os dois documentos?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "O valor da similaridade deve ser um número de ponto flutuante. Você fez este cálculo corretamente?" 8 | __msg__.good("Muito bem!") 9 | -------------------------------------------------------------------------------- /exercises/pt/test_02_10_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__ 4 | ), "Você está comparando a similaridade entre os dois tokens?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "O valor da similaridade deve ser um número de ponto flutuante. Você fez este cálculo corretamente?" 8 | __msg__.good("Bom!") 9 | -------------------------------------------------------------------------------- /exercises/pt/test_03_14_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "for doc in nlp.pipe(TEXTS)" in __solution__ 4 | ), "Você está iterando nos docs retornados em nlp.pipe?" 5 | __msg__.good("Bom!") 6 | -------------------------------------------------------------------------------- /exercises/pt/test_03_14_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "docs = list(nlp.pipe(TEXTS))" in __solution__ 4 | ), "Você está usando nlp.pipe envolvido em uma lista (list)?" 5 | __msg__.good("Bom trabalho!") 6 | -------------------------------------------------------------------------------- /exercises/pt/test_03_14_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "patterns = list(nlp.pipe(people))" in __solution__ 4 | ), "Você está usando nlp.pipe envolvido em uma lista (list)?" 5 | 6 | __msg__.good( 7 | "Bom trabalho! Vamos seguir agora com um exemplo prático que " 8 | "usa nlp.pipe para processar documentos com metadados adicionais." 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/pt/test_03_16_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc = nlp.make_doc(text)" in __solution__ 4 | or "doc = nlp.tokenizer(text)" in __solution__ 5 | ), "Você está apenas toquenizando o texto?" 6 | 7 | __msg__.good("Muito bom!") 8 | -------------------------------------------------------------------------------- /exercises/pt/train_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/pt/train_gadget.spacy -------------------------------------------------------------------------------- /exercises/zh/dev_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/zh/dev_gadget.spacy -------------------------------------------------------------------------------- /exercises/zh/exc_01_02_01.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy 2 | import ____ 3 | 4 | # 创建英文nlp对象 5 | nlp = ____ 6 | 7 | # 处理文本 8 | doc = nlp("This is a sentence.") 9 | 10 | # 打印文本 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_02_02.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy 2 | import ____ 3 | 4 | # 创建德语nlp对象 5 | nlp = ____ 6 | 7 | # 处理文本 (这是德语"Kind regards!"的意思) 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # 打印文本 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_02_03.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy 2 | import ____ 3 | 4 | # 创建西班牙语nlp对象 5 | nlp = ____ 6 | 7 | # 处理文本 (这是西班牙语"How are you?"的意思) 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # 打印文本 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_02_04.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy 2 | import ____ 3 | 4 | # 创建中文nlp对象 5 | nlp = ____ 6 | 7 | # 处理文本 8 | doc = nlp("这是一个句子。") 9 | 10 | # 打印文本 11 | print(____.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_03_01.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy创建中文nlp对象 2 | import ____ 3 | 4 | nlp = ____ 5 | 6 | # 处理文本 7 | doc = ____("我喜欢老虎和狮子。") 8 | 9 | # 选择第一个词符 10 | first_token = doc[____] 11 | 12 | # 打印第一个词符的文本 13 | print(first_token.____) 14 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_03_02.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy并创建中文nlp对象 2 | import ____ 3 | 4 | nlp = ____ 5 | 6 | # 处理文本 7 | doc = ____("我喜欢老虎和狮子。") 8 | 9 | # 遍历打印doc中的内容 10 | for i, token in enumerate(doc): 11 | print(i, token.text) 12 | 13 | # 截取Doc中"老虎"的部分 14 | laohu = ____ 15 | print(laohu.text) 16 | 17 | # 截取Doc中"老虎和狮子"的部分(不包括"。") 18 | laohu_he_shizi = ____ 19 | print(laohu_he_shizi.text) 20 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_04.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("zh") 4 | 5 | # 处理文本 6 | doc = nlp( 7 | "在1990年,一份豆腐脑可能只要¥0.5。" 8 | "现在一份豆腐脑可能要¥5左右了。" 9 | ) 10 | 11 | # 遍历doc中的词符 12 | for token in doc: 13 | # 检测词符的文本是否是"¥" 14 | if token.____ == "¥": 15 | # 获取文档中的下一个词符 16 | next_token = ____[____] 17 | # 检测下一个词符是否组成一个数字 18 | if ____.____: 19 | print("Price found:", next_token.text) 20 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 读取"zh_core_web_sm"流程 4 | nlp = ____ 5 | 6 | text = "写入历史了:苹果是美国第一家市值超过一万亿美元的上市公司。" 7 | 8 | # 处理文本 9 | doc = ____ 10 | 11 | # 打印doc中的文本 12 | print(____.____) 13 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_08_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | 5 | text = "写入历史了:苹果是美国第一家市值超过一万亿美元的上市公司。" 6 | 7 | # 处理文本 8 | doc = ____ 9 | 10 | for token in doc: 11 | # 获取词符文本、词性标注及依存关系标签 12 | token_text = ____.____ 13 | token_pos = ____.____ 14 | token_dep = ____.____ 15 | # 规范化打印的格式 16 | print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}") 17 | -------------------------------------------------------------------------------- /exercises/zh/exc_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | 5 | text = "写入历史了:苹果是美国第一家市值超过一万亿美元的上市公司。" 6 | 7 | # 处理文本 8 | doc = ____ 9 | 10 | # 对识别出的实体进行遍历 11 | for ent in ____.____: 12 | # 打印实体文本及标注 13 | print(ent.____, ____.____) 14 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | doc = nlp("我养了一只猫。") 5 | 6 | # 查找词汇"猫"的哈希值 7 | cat_hash = ____.____.____[____] 8 | print(cat_hash) 9 | 10 | # 查找cat_hash来得到字符串 11 | cat_string = ____.____.____[____] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | doc = nlp("周杰伦是一个人物。") 5 | 6 | # 查找标签是"人物"的字符串的哈希值 7 | person_hash = ____.____.____[____] 8 | print(person_hash) 9 | 10 | # 查找person_hash来拿到字符串 11 | person_string = ____.____.____[____] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # 导入Doc类 6 | from ____ import ____ 7 | 8 | # 目标文本:"spaCy is cool!" 9 | words = ["spaCy", "is", "cool", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # 用words和spaces创建一个Doc 13 | doc = ____(____, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # 导入Doc类 6 | from ____ import ____ 7 | 8 | # 目标文本:"Go, get started!" 9 | words = ["Go", ",", "get", "started", "!"] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # 使用words和spaces创建一个Doc 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # 导入Doc类 6 | from ____ import ____ 7 | 8 | # 目标文本:"Oh, really?!" 9 | words = [____, ____, ____, ____, ____] 10 | spaces = [____, ____, ____, ____, ____] 11 | 12 | # 用words和spaces创建一个Doc 13 | doc = ____(____, ____=____, ____=____) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 读取zh_core_web_md流程 4 | nlp = ____ 5 | 6 | # 处理文本 7 | doc = nlp("两只老虎跑得快") 8 | 9 | for token in doc: 10 | print(token.text) 11 | 12 | # 获取词符"老虎"的向量 13 | laohu_vector = ____.____ 14 | print(laohu_vector) 15 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_md") 4 | 5 | doc1 = nlp("这是一个温暖的夏日") 6 | doc2 = nlp("外面阳光明媚") 7 | 8 | # 获取doc1和doc2的相似度 9 | similarity = ____.____(____) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_md") 4 | 5 | doc = nlp("电影和音乐") 6 | 7 | for i, token in enumerate(doc): 8 | print(i, token.text) 9 | 10 | token1, token2 = doc[0], doc[2] 11 | 12 | # 获取词符"TV"和"books"的相似度 13 | similarity = ____.____(____) 14 | print(similarity) 15 | -------------------------------------------------------------------------------- /exercises/zh/exc_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_md") 4 | 5 | doc = nlp("这是一家不错的餐厅。之后我们又去了一家很好的酒吧。") 6 | 7 | for i, token in enumerate(doc): 8 | print(i, token.text) 9 | 10 | # 给"great restaurant"和"really nice bar"分别创建span 11 | span1 = ____ 12 | span2 = ____ 13 | 14 | # 获取两个span的相似度 15 | similarity = ____.____(____) 16 | print(similarity) 17 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 读取zh_core_web_sm流程 4 | nlp = ____ 5 | 6 | # 打印流程组件的名字 7 | print(____.____) 8 | 9 | # 打印完整流程的(name, component)元组 10 | print(____.____) 11 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_09_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Token 3 | 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | # 注册词符的扩展属性"is_country",其默认值是False 7 | ____.____(____, ____=____) 8 | 9 | # 处理文本,将词符"新加坡"的is_country属性设置为True 10 | doc = nlp("我住在新加坡。") 11 | ____ = True 12 | 13 | # 对所有词符打印词符文本及is_country属性 14 | print([(____, ____) for token in doc]) 15 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_09_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Token 3 | 4 | nlp = spacy.blank("zh") 5 | 6 | # 定义取值器函数,读入一个词符并返回其逆序的文本 7 | def get_reversed(token): 8 | return token.text[::-1] 9 | 10 | 11 | # 注册词符的扩展属性get_reversed及其取值器get_reversed 12 | ____.____(____, ____=____) 13 | 14 | # 处理文本,打印没一个词符的逆序属性 15 | doc = nlp("我说的所有话都是假的,包括这一句。") 16 | for ____ in ____: 17 | print("reversed:", ____) 18 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Doc 3 | 4 | nlp = spacy.blank("zh") 5 | 6 | # 定义取值器函数 7 | def get_has_number(doc): 8 | # 返回是否doc中的任一个词符的token.like_num返回True 9 | return any(____ for token in doc) 10 | 11 | 12 | # 注册Doc的扩展属性"has_number"及其取值器get_has_number 13 | ____.____(____, ____=____) 14 | 15 | # 处理文本,检查定制化的has_number属性 16 | doc = nlp("这家博物馆在2012年关了五个月。") 17 | print("has_number:", ____) 18 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Span 3 | 4 | nlp = spacy.blank("zh") 5 | 6 | # 定义这个方法 7 | def to_html(span, tag): 8 | # 将span文本包在HTML标签中并返回 9 | return f"<{tag}>{span.text}" 10 | 11 | 12 | # 注册这个Span方法扩展名"to_html"及其方法to_html 13 | ____.____(____, ____=____) 14 | 15 | # 处理文本,在span上调用to_html方法及其标签名"strong" 16 | doc = nlp("大家好,这是一个句子。") 17 | span = doc[0:3] 18 | print(____) 19 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # 处理文本,打印形容词 10 | for text in TEXTS: 11 | doc = nlp(text) 12 | print([token.text for token in doc if token.pos_ == "ADJ"]) 13 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # 处理文本,打印实体 10 | docs = [nlp(text) for text in TEXTS] 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("zh") 4 | 5 | people = ["周杰伦", "庞麦郎", "诸葛亮"] 6 | 7 | # 为PhraseMatcher创建一个模板列表 8 | patterns = [nlp(person) for person in people] 9 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | text = ( 5 | "在300多年的风雨历程中,历代同仁堂人始终恪守“炮制虽繁必不敢省人工,品味虽贵必不敢减物力”的古训," 6 | "树立“修合无人见,存心有天知”的自律意识,造就了制药过程中兢兢小心、精益求精的严细精神。" 7 | ) 8 | 9 | # 仅对文本做分词 10 | doc = nlp(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/zh/exc_03_16_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | text = ( 5 | "在300多年的风雨历程中,历代同仁堂人始终恪守“炮制虽繁必不敢省人工,品味虽贵必不敢减物力”的古训," 6 | "树立“修合无人见,存心有天知”的自律意识,造就了制药过程中兢兢小心、精益求精的严细精神。" 7 | ) 8 | 9 | # 关闭tagger和parser 10 | with ____.____(____): 11 | # 处理文本 12 | doc = ____ 13 | # 打印doc中的实体 14 | print(____) 15 | -------------------------------------------------------------------------------- /exercises/zh/exc_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ ____ --___ ____ --____ ____ 2 | -------------------------------------------------------------------------------- /exercises/zh/exc_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/zh/exc_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____ 2 | -------------------------------------------------------------------------------- /exercises/zh/gadgets.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["如何预定iPhone X", {"entities": [[4, 12, "GADGET"]] }], 3 | ["iPhone X就要来了", {"entities": [[0, 8, "GADGET"]] }], 4 | ["为买一个iPhone X花上万块钱值得吗?", {"entities": [[4, 12, "GADGET"]] }], 5 | ["iPhone 8的评测出来了", {"entities": [[0, 8, "GADGET"]] }], 6 | ["最新的iPhone已经到第11代了", {"entities": [[3, 9, "GADGET"]] }], 7 | ["我急需一部新手机,给点建议吧!", {"entities": [] }] 8 | ] 9 | 10 | -------------------------------------------------------------------------------- /exercises/zh/iphone.json: -------------------------------------------------------------------------------- 1 | [ 2 | "如何预定iPhone X", 3 | "iPhone X就要来了", 4 | "为买一个iPhone X花上万块钱值得吗?", 5 | "iPhone 8的评测出来了", 6 | "iPhone 11 vs iPhone 8:有哪些升级?", 7 | "我急需一部新手机,给点建议吧!" 8 | ] 9 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_02_01.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy 2 | import spacy 3 | 4 | # 创建英文nlp对象 5 | nlp = spacy.blank("en") 6 | 7 | # 处理文本 8 | doc = nlp("This is a sentence.") 9 | 10 | # 打印文本 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_02_02.py: -------------------------------------------------------------------------------- 1 | # 导入spaCy 2 | import spacy 3 | 4 | # 创建德语nlp对象 5 | nlp = spacy.blank("de") 6 | 7 | # 处理文本 (这是德语"Kind regards!"的意思) 8 | doc = nlp("Liebe Grüße!") 9 | 10 | # 打印文本 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_02_03.py: -------------------------------------------------------------------------------- 1 | # 导入西班牙语类 2 | import spacy 3 | 4 | # 创建西班牙语nlp对象 5 | nlp = spacy.blank("es") 6 | 7 | # 处理文本 (这是西班牙语"How are you?"的意思) 8 | doc = nlp("¿Cómo estás?") 9 | 10 | # 打印文本 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_02_04.py: -------------------------------------------------------------------------------- 1 | # 导入中文类 2 | import spacy 3 | 4 | # 创建中文nlp对象 5 | nlp = spacy.blank("zh") 6 | 7 | # 处理文本 8 | doc = nlp("这是一个句子。") 9 | 10 | # 打印文本 11 | print(doc.text) 12 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_03_01.py: -------------------------------------------------------------------------------- 1 | # 导入spacy并创建中文nlp对象 2 | import spacy 3 | 4 | nlp = spacy.blank("zh") 5 | 6 | # 处理文本 7 | doc = nlp("我喜欢老虎和狮子。") 8 | 9 | # 选择第一个词符 10 | first_token = doc[0] 11 | 12 | # 打印第一个词符的文本 13 | print(first_token.text) 14 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_03_02.py: -------------------------------------------------------------------------------- 1 | # 导入spacy并创建中文nlp对象 2 | import spacy 3 | 4 | nlp = spacy.blank("zh") 5 | 6 | # 处理文本 7 | doc = nlp("我喜欢老虎和狮子。") 8 | 9 | # 遍历打印doc中的内容 10 | for i, token in enumerate(doc): 11 | print(i, token.text) 12 | 13 | # 截取Doc中"老虎"的部分 14 | laohu = doc[2:3] 15 | print(laohu.text) 16 | 17 | # 截取Doc中"老虎和狮子"的部分(不包括"。") 18 | laohu_he_shizi = doc[2:5] 19 | print(laohu_he_shizi.text) 20 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 读取"zh_core_web_sm"流程 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | text = "写入历史了:苹果是美国第一家市值超过一万亿美元的上市公司。" 7 | 8 | # 处理文本 9 | doc = nlp(text) 10 | 11 | # 打印doc中的文本 12 | print(doc.text) 13 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_08_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | 5 | text = "写入历史了:苹果是美国第一家市值超过一万亿美元的上市公司。" 6 | 7 | # 处理文本 8 | doc = nlp(text) 9 | 10 | for token in doc: 11 | # 获取词符文本、词性标注及依存关系标签 12 | token_text = token.text 13 | token_pos = token.pos_ 14 | token_dep = token.dep_ 15 | # 规范化打印的格式 16 | print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}") 17 | -------------------------------------------------------------------------------- /exercises/zh/solution_01_08_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | 5 | text = "写入历史了:苹果是美国第一家市值超过一万亿美元的上市公司。" 6 | 7 | # 处理文本 8 | doc = nlp(text) 9 | 10 | # 对识别出的实体进行遍历 11 | for ent in doc.ents: 12 | # 打印实体文本及标注 13 | print(ent.text, ent.label_) 14 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_02_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | doc = nlp("我养了一只猫。") 5 | 6 | # 查找词汇"猫"的哈希值 7 | cat_hash = nlp.vocab.strings["猫"] 8 | print(cat_hash) 9 | 10 | # 查找cat_hash来得到字符串 11 | cat_string = nlp.vocab.strings[cat_hash] 12 | print(cat_string) 13 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_02_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | doc = nlp("周杰伦是一个人物。") 5 | 6 | # 查找标签是"人物"的字符串的哈希值 7 | person_hash = nlp.vocab.strings["人物"] 8 | print(person_hash) 9 | 10 | # 查找person_hash来拿到字符串 11 | person_string = nlp.vocab.strings[person_hash] 12 | print(person_string) 13 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_05_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # 导入Doc类 6 | from spacy.tokens import Doc 7 | 8 | # 目标文本:"spaCy is cool!" 9 | words = ["spaCy", "is", "cool", "!"] 10 | spaces = [True, True, False, False] 11 | 12 | # 用words和spaces创建一个Doc 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_05_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # 导入Doc类 6 | from spacy.tokens import Doc 7 | 8 | # 目标文本:"Go, get started!" 9 | words = ["Go", ",", "get", "started", "!"] 10 | spaces = [False, True, True, False, False] 11 | 12 | # 使用words和spaces创建一个Doc 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_05_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("en") 4 | 5 | # 导入Doc类 6 | from spacy.tokens import Doc 7 | 8 | # 目标文本:"Oh, really?!" 9 | words = ["Oh", ",", "really", "?", "!"] 10 | spaces = [False, True, False, False, False] 11 | 12 | # 用words和spaces创建一个Doc 13 | doc = Doc(nlp.vocab, words=words, spaces=spaces) 14 | print(doc.text) 15 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_07.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | doc = nlp("北京是一座美丽的城市。") 5 | 6 | # 遍历所有的词符 7 | for token in doc: 8 | # 检查当前词符是否是一个专有名词 9 | if token.pos_ == "PROPN": 10 | # 检查下一个词符是否是一个动词 11 | if doc[token.i + 1].pos_ == "VERB": 12 | print("找到了动词前面的一个专有名词:", token.text) 13 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_09.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 读取zh_core_web_md流程 4 | nlp = spacy.load("zh_core_web_md") 5 | 6 | # 处理文本 7 | doc = nlp("两只老虎跑得快") 8 | 9 | for token in doc: 10 | print(token.text) 11 | 12 | # 获取词符"老虎"的向量 13 | laohu_vector = doc[2].vector 14 | print(laohu_vector) 15 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_10_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_md") 4 | 5 | doc1 = nlp("这是一个温暖的夏日") 6 | doc2 = nlp("外面阳光明媚") 7 | 8 | # 获取doc1和doc2的相似度 9 | similarity = doc1.similarity(doc2) 10 | print(similarity) 11 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_10_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_md") 4 | 5 | doc = nlp("电影和音乐") 6 | 7 | for i, token in enumerate(doc): 8 | print(i, token.text) 9 | 10 | token1, token2 = doc[0], doc[2] 11 | 12 | # 获取词符"电影"和"音乐"的相似度 13 | similarity = token1.similarity(token2) 14 | print(similarity) 15 | -------------------------------------------------------------------------------- /exercises/zh/solution_02_10_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_md") 4 | 5 | doc = nlp("这是一家不错的餐厅。之后我们又去了一家很好的酒吧。") 6 | 7 | for i, token in enumerate(doc): 8 | print(i, token.text) 9 | 10 | # 给"不错的餐厅"和"很好的酒吧"分别创建span 11 | span1 = doc[2:5] 12 | span2 = doc[12:15] 13 | 14 | # 获取两个span的相似度 15 | similarity = span1.similarity(span2) 16 | print(similarity) 17 | -------------------------------------------------------------------------------- /exercises/zh/solution_03_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | # 读取zh_core_web_sm流程 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | # 打印流程组件的名字 7 | print(nlp.pipe_names) 8 | 9 | # 打印完整流程的(name, component)元组 10 | print(nlp.pipeline) 11 | -------------------------------------------------------------------------------- /exercises/zh/solution_03_09_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | from spacy.tokens import Token 3 | 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | # 注册词符的扩展属性"is_country",其默认值是False 7 | Token.set_extension("is_country", default=False) 8 | 9 | # 处理文本,将词符"新加坡"的is_country属性设置为True 10 | doc = nlp("我住在新加坡。") 11 | doc[2]._.is_country = True 12 | 13 | # 对所有词符打印词符文本及is_country属性 14 | print([(token.text, token._.is_country) for token in doc]) 15 | -------------------------------------------------------------------------------- /exercises/zh/solution_03_14_01.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # 处理文本,打印形容词 10 | for doc in nlp.pipe(TEXTS): 11 | print([token.text for token in doc if token.pos_ == "ADJ"]) 12 | -------------------------------------------------------------------------------- /exercises/zh/solution_03_14_02.py: -------------------------------------------------------------------------------- 1 | import json 2 | import spacy 3 | 4 | nlp = spacy.load("zh_core_web_sm") 5 | 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f: 7 | TEXTS = json.loads(f.read()) 8 | 9 | # 处理文本,打印实体 10 | docs = list(nlp.pipe(TEXTS)) 11 | entities = [doc.ents for doc in docs] 12 | print(*entities) 13 | -------------------------------------------------------------------------------- /exercises/zh/solution_03_14_03.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.blank("zh") 4 | 5 | people = ["周杰伦", "庞麦郎", "诸葛亮"] 6 | 7 | # 为PhraseMatcher创建一个模板列表 8 | patterns = list(nlp.pipe(people)) 9 | -------------------------------------------------------------------------------- /exercises/zh/solution_03_16_01.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | text = ( 5 | "在300多年的风雨历程中,历代同仁堂人始终恪守“炮制虽繁必不敢省人工,品味虽贵必不敢减物力”的古训," 6 | "树立“修合无人见,存心有天知”的自律意识,造就了制药过程中兢兢小心、精益求精的严细精神。" 7 | ) 8 | 9 | # 仅对文本做分词 10 | doc = nlp.make_doc(text) 11 | print([token.text for token in doc]) 12 | -------------------------------------------------------------------------------- /exercises/zh/solution_03_16_02.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | nlp = spacy.load("zh_core_web_sm") 4 | text = ( 5 | "在300多年的风雨历程中,历代同仁堂人始终恪守“炮制虽繁必不敢省人工,品味虽贵必不敢减物力”的古训," 6 | "树立“修合无人见,存心有天知”的自律意识,造就了制药过程中兢兢小心、精益求精的严细精神。" 7 | ) 8 | 9 | # 关闭tagger和parser 10 | with nlp.select_pipes(disable=["tagger", "parser"]): 11 | # 处理文本 12 | doc = nlp(text) 13 | # 打印doc中的实体 14 | print(doc.ents) 15 | -------------------------------------------------------------------------------- /exercises/zh/solution_04_07_01.sh: -------------------------------------------------------------------------------- 1 | python -m spacy init config ./config.cfg --lang zh --pipeline ner 2 | -------------------------------------------------------------------------------- /exercises/zh/solution_04_07_02.sh: -------------------------------------------------------------------------------- 1 | cat ./config.cfg 2 | -------------------------------------------------------------------------------- /exercises/zh/solution_04_08.sh: -------------------------------------------------------------------------------- 1 | python -m spacy train ./exercises/zh/config_gadget.cfg --output ./output --paths.train ./exercises/zh/train_gadget.spacy --paths.dev ./exercises/zh/dev_gadget.spacy 2 | -------------------------------------------------------------------------------- /exercises/zh/test_01_02_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.en 4 | 5 | assert isinstance( 6 | nlp, spacy.lang.en.English 7 | ), "nlp应该是英文类的一个实例。" 8 | assert isinstance( 9 | doc, spacy.tokens.Doc 10 | ), "你用nlp实例处理过文本并且创建了一个doc吗?" 11 | assert "print(doc.text)" in __solution__, "你打印doc.text了吗?" 12 | 13 | __msg__.good("干得漂亮!") 14 | -------------------------------------------------------------------------------- /exercises/zh/test_01_02_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.de 4 | 5 | assert isinstance( 6 | nlp, spacy.lang.de.German 7 | ), "nlp应该是德文类的一个实例。" 8 | assert isinstance( 9 | doc, spacy.tokens.Doc 10 | ), "你用nlp实例处理过文本并且创建了一个doc吗?" 11 | assert "print(doc.text)" in __solution__, "你打印doc.text了吗?" 12 | 13 | __msg__.good("Sehr gut! :)") 14 | -------------------------------------------------------------------------------- /exercises/zh/test_01_02_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.es 4 | 5 | assert isinstance( 6 | nlp, spacy.lang.es.Spanish 7 | ), "nlp应该是西班牙语类的一个实例。" 8 | assert isinstance( 9 | doc, spacy.tokens.Doc 10 | ), "你用nlp实例处理过文本并且创建了一个doc吗?" 11 | assert "print(doc.text)" in __solution__, "你打印doc.text了吗?" 12 | 13 | __msg__.good("Perfecto! 我们现在继续试试documents,spans和tokens.") 14 | -------------------------------------------------------------------------------- /exercises/zh/test_01_02_04.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | import spacy.tokens 3 | import spacy.lang.es 4 | 5 | assert isinstance( 6 | nlp, spacy.lang.zh.Chinese 7 | ), "nlp应该是中文类的一个实例。" 8 | assert isinstance( 9 | doc, spacy.tokens.Doc 10 | ), "你用nlp实例处理过文本并且创建了一个doc吗?" 11 | assert "print(doc.text)" in __solution__, "你打印doc.text了吗?" 12 | 13 | __msg__.good("Perfecto! 我们现在继续试试documents,spans和tokens.") 14 | -------------------------------------------------------------------------------- /exercises/zh/test_01_03_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | doc.text == "我喜欢老虎和狮子。" 4 | ), "你确定你正确处理了文本吗?" 5 | assert first_token == doc[0], "你确定你选择了第一个词符吗?" 6 | assert "print(first_token.text)" in __solution__, "你打印了词符的文本吗?" 7 | assert 'spacy.blank("zh")' in __solution__, '你将spacy.blank设置为正确的语言了吗?' 8 | __msg__.good("干得漂亮!") 9 | -------------------------------------------------------------------------------- /exercises/zh/test_01_03_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | doc.text == "我喜欢老虎和狮子。" 4 | ), "你确定你正确处理文本了吗?" 5 | assert ( 6 | laohu == doc[2:3] 7 | ), "你确定你选择了老虎的正确跨度吗?" 8 | assert ( 9 | laohu_he_shizi == doc[2:5] 10 | ), "你确定你选择了老虎和狮子的正确跨度吗?" 11 | assert 'spacy.blank("zh")' in __solution__, '你将spacy.blank设置为正确的语言了吗?' 12 | __msg__.good("好样的!") 13 | -------------------------------------------------------------------------------- /exercises/zh/test_01_07.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "spacy.load" in __solution__, "你有调用spacy.load吗?" 3 | assert nlp.meta["lang"] == "zh", "你有调用正确的流程吗?" 4 | assert nlp.meta["name"] == "core_web_sm", "你有调用正确的流程吗?" 5 | assert "nlp(text)" in __solution__, "你有正确处理文本吗?" 6 | assert "print(doc.text)" in __solution__, "你有打印Doc的文本吗?" 7 | 8 | __msg__.good( 9 | "好极了!现在你已经练习过读取模型,我们来看看模型的一些预测方法。" 10 | ) 11 | -------------------------------------------------------------------------------- /exercises/zh/test_01_08_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token_text = token.text" in __solution__ 4 | ), "你有正确拿到词符的文本吗?" 5 | assert ( 6 | "token_pos = token.pos_" in __solution__ 7 | ), "你有正确拿到词符的词性标注了吗?记着要用带下划线的属性。" 8 | assert ( 9 | "token_dep = token.dep_" in __solution__ 10 | ), "你有正确拿到词符的依存关系标签了吗?记着要用带下划线的属性。" 11 | __msg__.good("完美!") 12 | -------------------------------------------------------------------------------- /exercises/zh/test_01_08_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "for ent in doc.ents" in __solution__, "你有遍历所有实体吗?" 3 | assert ( 4 | "print(ent.text, ent.label_)" in __solution__ 5 | ), "你有打印文本和标注吗?" 6 | 7 | __msg__.good( 8 | "太棒啦!到现在为止,每一次模型都是正确的。" 9 | "下一个练习我们看看模型错了会怎么样," 10 | "以及如何调整模型。" 11 | ) 12 | -------------------------------------------------------------------------------- /exercises/zh/test_01_09.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "for ent in doc.ents" in __solution__, "你有遍历实体吗?" 3 | assert iphone_x.text == "iPhone X", "你确定iphone_x包含了所有正确的词符吗?" 4 | 5 | __msg__.good( 6 | "完美!当然你也不用一定要这么手动来做。" 7 | "下一个练习我们来学习spaCy的基于规则的matcher," 8 | "使用它我们就可以在文本中寻找到特定的词语和短语了。" 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/zh/test_02_02_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert cat_hash == nlp.vocab.strings["猫"], "你有使用正确的哈希值吗?" 3 | assert 'nlp.vocab.strings["猫"]' in __solution__, "你有使用正确的字符串吗?" 4 | assert cat_string == "猫", "你有获得正确的字符串吗?" 5 | assert ( 6 | "nlp.vocab.strings[cat_hash]" in __solution__ 7 | ), "你有从哈希值中获得字符串吗?" 8 | 9 | __msg__.good("干得漂亮!") 10 | -------------------------------------------------------------------------------- /exercises/zh/test_02_02_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | person_hash == nlp.vocab.strings["人物"] 4 | ), "你有使用正确的哈希值吗?" 5 | assert ( 6 | 'nlp.vocab.strings["人物"]' in __solution__ 7 | ), "你有使用正确的哈希值吗?" 8 | assert person_string == "人物", "你有获得正确的字符串吗?" 9 | assert ( 10 | "nlp.vocab.strings[person_hash]" in __solution__ 11 | ), "你有从哈希值中获得字符串吗?" 12 | 13 | __msg__.good("干得漂亮!") 14 | -------------------------------------------------------------------------------- /exercises/zh/test_02_05_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "from spacy.tokens import Doc" in __solution__ 4 | ), "你有正确导入Doc类吗?" 5 | assert doc.text == "spaCy is cool!", "你有正确创建Doc吗?" 6 | assert "print(doc.text)" in __solution__, "你有打印Doc的文字吗?" 7 | __msg__.good("好极了!") 8 | -------------------------------------------------------------------------------- /exercises/zh/test_02_09.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | 'spacy.load("zh_core_web_md")' in __solution__ 4 | ), "你有正确读入中等规模的流程吗?" 5 | assert "doc[2].vector" in __solution__, "你有得到正确的向量吗?" 6 | __msg__.good( 7 | "干得漂亮!下一个练习,我们会用spaCy来通过这些词向量计算document、span、和token" 8 | "之间的相似度。" 9 | ) 10 | -------------------------------------------------------------------------------- /exercises/zh/test_02_10_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__ 4 | ), "你有计算两个doc之间的相似度吗?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "相似度分数是一个浮点数。你确定你计算正确了吗?" 8 | __msg__.good("棒棒哒!") 9 | -------------------------------------------------------------------------------- /exercises/zh/test_02_10_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__ 4 | ), "你有计算两个token之间的相似度吗?" 5 | assert ( 6 | 0 <= float(similarity) <= 1 7 | ), "相似度分数是一个浮点数。你确定你计算正确了吗?" 8 | __msg__.good("厉害!") 9 | -------------------------------------------------------------------------------- /exercises/zh/test_02_14.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "from spacy.matcher import PhraseMatcher" in __solution__ 4 | ), "你有正确导入PhraseMatcher吗?" 5 | assert ( 6 | "PhraseMatcher(nlp.vocab)" in __solution__ 7 | ), "你有正确初始化PhraseMatcher吗?" 8 | assert "matcher(doc)" in __solution__, "你有在doc上调用matcher吗?" 9 | assert len(matches) == 2, "匹配结果数目不对,应该是2个。" 10 | __msg__.good("棒极了!我们来用这个matcher添加一些定制化的实体。") 11 | -------------------------------------------------------------------------------- /exercises/zh/test_03_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert nlp.meta["name"] == "core_web_sm", "你有读取正确的流程吗?" 3 | assert nlp.meta["lang"] == "zh", "你有读取正确的流程吗?" 4 | assert "print(nlp.pipe_names)" in __solution__, "你有打印组件名字了吗?" 5 | assert "print(nlp.pipeline)" in __solution__, "你有打印流程了吗?" 6 | 7 | __msg__.good( 8 | "干得漂亮!当你不确定当前流程的时候,你可以随时打印nlp.pipe_names或者" 9 | "nlp.pipeline来检查下。" 10 | ) 11 | -------------------------------------------------------------------------------- /exercises/zh/test_03_06.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert "len(doc)" in __solution__, "你有拿到doc的长度了吗?" 3 | assert "return doc" in __solution__, "你有返回这个doc吗?" 4 | assert "nlp.add_pipe" in __solution__, "你有添加这个组件吗?" 5 | assert ( 6 | "first=True" in __solution__ 7 | ), "你有把组件加到流程的最前面吗?" 8 | assert nlp.pipe_names[0] == "length_component", "组件名字好像不太对?" 9 | __msg__.good("完美!现在我们来看看再复杂一点的组件!") 10 | -------------------------------------------------------------------------------- /exercises/zh/test_03_14_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "for doc in nlp.pipe(TEXTS)" in __solution__ 4 | ), "你有遍历nlp.pipe生成的那些doc吗?" 5 | __msg__.good("好样的!") 6 | -------------------------------------------------------------------------------- /exercises/zh/test_03_14_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "docs = list(nlp.pipe(TEXTS))" in __solution__ 4 | ), "你有用list将nlp.pipe的结果变为列表吗?" 5 | __msg__.good("美美哒!") 6 | -------------------------------------------------------------------------------- /exercises/zh/test_03_14_03.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "patterns = list(nlp.pipe(people))" in __solution__ 4 | ), "你有用list将nlp.pipe的结果变为列表吗?" 5 | 6 | __msg__.good( 7 | "干得漂亮!接下来我们看一个实际例子,用nlp.pipe来处理文档生成更多的元数据。" 8 | ) 9 | -------------------------------------------------------------------------------- /exercises/zh/test_03_16_01.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | "doc = nlp.make_doc(text)" in __solution__ 4 | or "doc = nlp.tokenizer(text)" in __solution__ 5 | ), "你是否仅是对文本做了分词?" 6 | 7 | __msg__.good("棒棒哒!") 8 | -------------------------------------------------------------------------------- /exercises/zh/test_03_16_02.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | 'with nlp.select_pipes(disable=["tagger", "parser"])' in __solution__ 4 | or 'with nlp.select_pipes(disable=["parser", "tagger"])' in __solution__ 5 | ), "你是否在nlp.select_pipes中调用了正确的组件?" 6 | 7 | __msg__.good( 8 | "完美!现在我们已经练习了一些技巧来提高性能,我们可以学习下一个章节," 9 | "训练一些spaCy的神经网络模型了。" 10 | ) 11 | -------------------------------------------------------------------------------- /exercises/zh/test_04_04.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | assert ( 3 | 'spacy.blank("zh")' in __solution__ 4 | ), "你有创建空的中文模型了吗?" 5 | assert ( 6 | "DocBin(docs=docs)" in __solution__ 7 | ), "你有正确创建DocBin对象吗?" 8 | assert "doc_bin.to_disk(" in __solution__, "你有使用方法to_disk吗?" 9 | assert "train.spacy" in __solution__, "你确定文件名是正确的吗?" 10 | 11 | __msg__.good( 12 | "好极了!流程现在没问题了,我们要开始进行训练了。" 13 | ) 14 | -------------------------------------------------------------------------------- /exercises/zh/train_gadget.spacy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/zh/train_gadget.spacy -------------------------------------------------------------------------------- /exercises/zh/weibo.json: -------------------------------------------------------------------------------- 1 | [ 2 | "我最喜欢吃性价比高的麦当劳了!", 3 | "我以为麦当劳只有预处理的汉堡,现在我才发现他们家还有生的汉堡??", 4 | "为什么各位还在吃麦当劳 :(", 5 | "中国的麦当劳有老北京鸡肉卷,这也太爽了!", 6 | "作为一个帅帅的男人,去麦当劳我只吃巨无霸:P", 7 | "今天早上决定去吃麦当劳套餐,现在胃里涨了一整天了。" 8 | ] 9 | -------------------------------------------------------------------------------- /gatsby-browser.js: -------------------------------------------------------------------------------- 1 | // This doesn't have to be here – but if we do import Juniper here, it's already 2 | // preloaded and cached when we dynamically import it in code.js. 3 | import Juniper from './src/components/juniper' // eslint-disable-line no-unused-vars 4 | -------------------------------------------------------------------------------- /src/context.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | export const ChapterContext = React.createContext() 4 | export const LocaleContext = React.createContext() 5 | -------------------------------------------------------------------------------- /src/pages/de.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => 6 | -------------------------------------------------------------------------------- /src/pages/en.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => ( 6 | 7 | ) 8 | -------------------------------------------------------------------------------- /src/pages/es.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => ( 6 | 7 | ) 8 | -------------------------------------------------------------------------------- /src/pages/fr.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => 6 | -------------------------------------------------------------------------------- /src/pages/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => ( 6 | 7 | ) 8 | -------------------------------------------------------------------------------- /src/pages/ja.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => ( 6 | 7 | ) 8 | -------------------------------------------------------------------------------- /src/pages/pt.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => 6 | -------------------------------------------------------------------------------- /src/pages/zh.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | import Home from '../components/home' 4 | 5 | export default () => ( 6 | 7 | ) 8 | -------------------------------------------------------------------------------- /src/styles/chapter.module.sass: -------------------------------------------------------------------------------- 1 | .pagination 2 | max-width: 100% 3 | width: var(--width-container) 4 | margin: 4rem auto 0 5 | display: flex 6 | justify-content: space-between 7 | -------------------------------------------------------------------------------- /src/styles/link.module.sass: -------------------------------------------------------------------------------- 1 | .root 2 | cursor: pointer 3 | border-bottom: 1px solid var(--color-theme) 4 | 5 | &:hover 6 | border-bottom-color: var(--color-front) 7 | 8 | .secondary 9 | border-bottom-color: var(--color-subtle-dark) 10 | 11 | &:hover 12 | border-bottom-color: var(--color-front) 13 | 14 | .hidden 15 | border-bottom: 0 16 | -------------------------------------------------------------------------------- /static/dep_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example.png -------------------------------------------------------------------------------- /static/dep_example_de.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_de.png -------------------------------------------------------------------------------- /static/dep_example_es.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_es.png -------------------------------------------------------------------------------- /static/dep_example_fr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_fr.png -------------------------------------------------------------------------------- /static/dep_example_ja.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_ja.png -------------------------------------------------------------------------------- /static/dep_example_zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_zh.png -------------------------------------------------------------------------------- /static/doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/doc.png -------------------------------------------------------------------------------- /static/doc_span.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/doc_span.png -------------------------------------------------------------------------------- /static/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/icon.png -------------------------------------------------------------------------------- /static/icon_check.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /static/ner_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example.png -------------------------------------------------------------------------------- /static/ner_example_de.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_de.png -------------------------------------------------------------------------------- /static/ner_example_es.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_es.png -------------------------------------------------------------------------------- /static/ner_example_fr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_fr.png -------------------------------------------------------------------------------- /static/ner_example_ja.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_ja.png -------------------------------------------------------------------------------- /static/ner_example_zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_zh.png -------------------------------------------------------------------------------- /static/package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package.png -------------------------------------------------------------------------------- /static/package_de.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_de.png -------------------------------------------------------------------------------- /static/package_es.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_es.png -------------------------------------------------------------------------------- /static/package_fr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_fr.png -------------------------------------------------------------------------------- /static/package_ja.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_ja.png -------------------------------------------------------------------------------- /static/package_meta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta.png -------------------------------------------------------------------------------- /static/package_meta_de.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_de.png -------------------------------------------------------------------------------- /static/package_meta_es.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_es.png -------------------------------------------------------------------------------- /static/package_meta_fr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_fr.png -------------------------------------------------------------------------------- /static/package_meta_zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_zh.png -------------------------------------------------------------------------------- /static/package_zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_zh.png -------------------------------------------------------------------------------- /static/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/pipeline.png -------------------------------------------------------------------------------- /static/profile.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/profile.jpg -------------------------------------------------------------------------------- /static/social.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social.jpg -------------------------------------------------------------------------------- /static/social_de.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_de.jpg -------------------------------------------------------------------------------- /static/social_es.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_es.jpg -------------------------------------------------------------------------------- /static/social_fr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_fr.jpg -------------------------------------------------------------------------------- /static/social_ja.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_ja.jpg -------------------------------------------------------------------------------- /static/social_pt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_pt.jpg -------------------------------------------------------------------------------- /static/social_zh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_zh.jpg -------------------------------------------------------------------------------- /static/span_indices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/span_indices.png -------------------------------------------------------------------------------- /static/training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training.png -------------------------------------------------------------------------------- /static/training_de.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_de.png -------------------------------------------------------------------------------- /static/training_es.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_es.png -------------------------------------------------------------------------------- /static/training_fr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_fr.png -------------------------------------------------------------------------------- /static/training_zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_zh.png -------------------------------------------------------------------------------- /static/vocab_stringstore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore.png -------------------------------------------------------------------------------- /static/vocab_stringstore_de.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_de.png -------------------------------------------------------------------------------- /static/vocab_stringstore_es.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_es.png -------------------------------------------------------------------------------- /static/vocab_stringstore_fr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_fr.png -------------------------------------------------------------------------------- /static/vocab_stringstore_zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_zh.png -------------------------------------------------------------------------------- /static/website.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/website.png --------------------------------------------------------------------------------