├── .dockerignore
├── .eslintrc.js
├── .gitignore
├── .prettierrc
├── LICENSE
├── Makefile
├── README.md
├── azure-pipelines.yml
├── binder
    └── requirements.txt
├── chapters
    ├── LICENSE
    ├── de
    │   ├── chapter1.md
    │   ├── chapter2.md
    │   ├── chapter3.md
    │   ├── chapter4.md
    │   └── slides
    │   │   ├── chapter1_01_introduction-to-spacy.md
    │   │   ├── chapter1_02_statistical-models.md
    │   │   ├── chapter1_03_rule-based-matching.md
    │   │   ├── chapter2_01_data-structures-1.md
    │   │   ├── chapter2_02_data-structures-2.md
    │   │   ├── chapter2_03_word-vectors-similarity.md
    │   │   ├── chapter2_04_models-rules.md
    │   │   ├── chapter3_01_processing-pipelines.md
    │   │   ├── chapter3_02_custom-pipeline-components.md
    │   │   ├── chapter3_03_extension-attributes.md
    │   │   ├── chapter3_04_scaling-performance.md
    │   │   ├── chapter4_01_training-updating-models.md
    │   │   ├── chapter4_02_running-training.md
    │   │   ├── chapter4_03_training-best-practices.md
    │   │   └── chapter4_04_wrapping-up.md
    ├── en
    │   ├── chapter1.md
    │   ├── chapter2.md
    │   ├── chapter3.md
    │   ├── chapter4.md
    │   └── slides
    │   │   ├── chapter1_01_introduction-to-spacy.md
    │   │   ├── chapter1_02_statistical-models.md
    │   │   ├── chapter1_03_rule-based-matching.md
    │   │   ├── chapter2_01_data-structures-1.md
    │   │   ├── chapter2_02_data-structures-2.md
    │   │   ├── chapter2_03_word-vectors-similarity.md
    │   │   ├── chapter2_04_models-rules.md
    │   │   ├── chapter3_01_processing-pipelines.md
    │   │   ├── chapter3_02_custom-pipeline-components.md
    │   │   ├── chapter3_03_extension-attributes.md
    │   │   ├── chapter3_04_scaling-performance.md
    │   │   ├── chapter4_01_training-updating-models.md
    │   │   ├── chapter4_02_running-training.md
    │   │   ├── chapter4_03_training-best-practices.md
    │   │   └── chapter4_04_wrapping-up.md
    ├── es
    │   ├── chapter1.md
    │   ├── chapter2.md
    │   ├── chapter3.md
    │   ├── chapter4.md
    │   └── slides
    │   │   ├── chapter1_01_introduction-to-spacy.md
    │   │   ├── chapter1_02_statistical-models.md
    │   │   ├── chapter1_03_rule-based-matching.md
    │   │   ├── chapter2_01_data-structures-1.md
    │   │   ├── chapter2_02_data-structures-2.md
    │   │   ├── chapter2_03_word-vectors-similarity.md
    │   │   ├── chapter2_04_models-rules.md
    │   │   ├── chapter3_01_processing-pipelines.md
    │   │   ├── chapter3_02_custom-pipeline-components.md
    │   │   ├── chapter3_03_extension-attributes.md
    │   │   ├── chapter3_04_scaling-performance.md
    │   │   ├── chapter4_01_training-updating-models.md
    │   │   ├── chapter4_02_running-training.md
    │   │   ├── chapter4_03_training-best-practices.md
    │   │   └── chapter4_04_wrapping-up.md
    ├── fr
    │   ├── chapter1.md
    │   ├── chapter2.md
    │   ├── chapter3.md
    │   ├── chapter4.md
    │   └── slides
    │   │   ├── chapter1_01_introduction-to-spacy.md
    │   │   ├── chapter1_02_statistical-models.md
    │   │   ├── chapter1_03_rule-based-matching.md
    │   │   ├── chapter2_01_data-structures-1.md
    │   │   ├── chapter2_02_data-structures-2.md
    │   │   ├── chapter2_03_word-vectors-similarity.md
    │   │   ├── chapter2_04_models-rules.md
    │   │   ├── chapter3_01_processing-pipelines.md
    │   │   ├── chapter3_02_custom-pipeline-components.md
    │   │   ├── chapter3_03_extension-attributes.md
    │   │   ├── chapter3_04_scaling-performance.md
    │   │   ├── chapter4_01_training-updating-models.md
    │   │   ├── chapter4_02_running-training.md
    │   │   ├── chapter4_03_training-best-practices.md
    │   │   └── chapter4_04_wrapping-up.md
    ├── ja
    │   ├── README.txt
    │   ├── chapter1.md
    │   ├── chapter2.md
    │   ├── chapter3.md
    │   ├── chapter4.md
    │   └── slides
    │   │   ├── chapter1_01_introduction-to-spacy.md
    │   │   ├── chapter1_02_statistical-models.md
    │   │   ├── chapter1_03_rule-based-matching.md
    │   │   ├── chapter2_01_data-structures-1.md
    │   │   ├── chapter2_02_data-structures-2.md
    │   │   ├── chapter2_03_word-vectors-similarity.md
    │   │   ├── chapter2_04_models-rules.md
    │   │   ├── chapter3_01_processing-pipelines.md
    │   │   ├── chapter3_02_custom-pipeline-components.md
    │   │   ├── chapter3_03_extension-attributes.md
    │   │   ├── chapter3_04_scaling-performance.md
    │   │   ├── chapter4_01_training-updating-models.md
    │   │   ├── chapter4_02_running-training.md
    │   │   ├── chapter4_03_training-best-practices.md
    │   │   └── chapter4_04_wrapping-up.md
    ├── pt
    │   ├── chapter1.md
    │   ├── chapter2.md
    │   ├── chapter3.md
    │   ├── chapter4.md
    │   └── slides
    │   │   ├── chapter1_01_introduction-to-spacy.md
    │   │   ├── chapter1_02_statistical-models.md
    │   │   ├── chapter1_03_rule-based-matching.md
    │   │   ├── chapter2_01_data-structures-1.md
    │   │   ├── chapter2_02_data-structures-2.md
    │   │   ├── chapter2_03_word-vectors-similarity.md
    │   │   ├── chapter2_04_models-rules.md
    │   │   ├── chapter3_01_processing-pipelines.md
    │   │   ├── chapter3_02_custom-pipeline-components.md
    │   │   ├── chapter3_03_extension-attributes.md
    │   │   ├── chapter3_04_scaling-performance.md
    │   │   ├── chapter4_01_training-updating-models.md
    │   │   ├── chapter4_02_running-training.md
    │   │   ├── chapter4_02_training-loop.md
    │   │   ├── chapter4_03_training-best-practices.md
    │   │   └── chapter4_04_wrapping-up.md
    └── zh
    │   ├── chapter1.md
    │   ├── chapter2.md
    │   ├── chapter3.md
    │   ├── chapter4.md
    │   └── slides
    │       ├── chapter1_01_introduction-to-spacy.md
    │       ├── chapter1_02_statistical-models.md
    │       ├── chapter1_03_rule-based-matching.md
    │       ├── chapter2_01_data-structures-1.md
    │       ├── chapter2_02_data-structures-2.md
    │       ├── chapter2_03_word-vectors-similarity.md
    │       ├── chapter2_04_models-rules.md
    │       ├── chapter3_01_processing-pipelines.md
    │       ├── chapter3_02_custom-pipeline-components.md
    │       ├── chapter3_03_extension-attributes.md
    │       ├── chapter3_04_scaling-performance.md
    │       ├── chapter4_01_training-updating-models.md
    │       ├── chapter4_02_running-training.md
    │       ├── chapter4_02_training-loop.md
    │       ├── chapter4_03_training-best-practices.md
    │       └── chapter4_04_wrapping-up.md
├── conftest.py
├── docker
    └── Dockerfile
├── exercises
    ├── de
    │   ├── bookquotes.json
    │   ├── capitals.json
    │   ├── config_gadget.cfg
    │   ├── countries.json
    │   ├── country_text.txt
    │   ├── dev_gadget.spacy
    │   ├── exc_01_02_01.py
    │   ├── exc_01_02_02.py
    │   ├── exc_01_02_03.py
    │   ├── exc_01_03_01.py
    │   ├── exc_01_03_02.py
    │   ├── exc_01_04.py
    │   ├── exc_01_07.py
    │   ├── exc_01_08_01.py
    │   ├── exc_01_08_02.py
    │   ├── exc_01_09.py
    │   ├── exc_01_11.py
    │   ├── exc_01_12_01.py
    │   ├── exc_01_12_02.py
    │   ├── exc_01_12_03.py
    │   ├── exc_02_02_01.py
    │   ├── exc_02_02_02.py
    │   ├── exc_02_05_01.py
    │   ├── exc_02_05_02.py
    │   ├── exc_02_05_03.py
    │   ├── exc_02_06.py
    │   ├── exc_02_07.py
    │   ├── exc_02_09.py
    │   ├── exc_02_10_01.py
    │   ├── exc_02_10_02.py
    │   ├── exc_02_10_03.py
    │   ├── exc_02_13.py
    │   ├── exc_02_14.py
    │   ├── exc_02_15.py
    │   ├── exc_03_03.py
    │   ├── exc_03_06.py
    │   ├── exc_03_07.py
    │   ├── exc_03_09_01.py
    │   ├── exc_03_09_02.py
    │   ├── exc_03_10_01.py
    │   ├── exc_03_10_02.py
    │   ├── exc_03_11.py
    │   ├── exc_03_12.py
    │   ├── exc_03_14_01.py
    │   ├── exc_03_14_02.py
    │   ├── exc_03_14_03.py
    │   ├── exc_03_15.py
    │   ├── exc_03_16_01.py
    │   ├── exc_03_16_02.py
    │   ├── exc_04_03.py
    │   ├── exc_04_04.py
    │   ├── exc_04_07_01.sh
    │   ├── exc_04_07_02.sh
    │   ├── exc_04_08.sh
    │   ├── exc_04_11.py
    │   ├── exc_04_12_01.py
    │   ├── exc_04_12_02.py
    │   ├── gadgets.json
    │   ├── iphone.json
    │   ├── solution_01_02_01.py
    │   ├── solution_01_02_02.py
    │   ├── solution_01_02_03.py
    │   ├── solution_01_03_01.py
    │   ├── solution_01_03_02.py
    │   ├── solution_01_04.py
    │   ├── solution_01_07.py
    │   ├── solution_01_08_01.py
    │   ├── solution_01_08_02.py
    │   ├── solution_01_09.py
    │   ├── solution_01_11.py
    │   ├── solution_01_12_01.py
    │   ├── solution_01_12_02.py
    │   ├── solution_01_12_03.py
    │   ├── solution_02_02_01.py
    │   ├── solution_02_02_02.py
    │   ├── solution_02_05_01.py
    │   ├── solution_02_05_02.py
    │   ├── solution_02_05_03.py
    │   ├── solution_02_06.py
    │   ├── solution_02_07.py
    │   ├── solution_02_09.py
    │   ├── solution_02_10_01.py
    │   ├── solution_02_10_02.py
    │   ├── solution_02_10_03.py
    │   ├── solution_02_13.py
    │   ├── solution_02_14.py
    │   ├── solution_02_15.py
    │   ├── solution_03_03.py
    │   ├── solution_03_06.py
    │   ├── solution_03_07.py
    │   ├── solution_03_09_01.py
    │   ├── solution_03_09_02.py
    │   ├── solution_03_10_01.py
    │   ├── solution_03_10_02.py
    │   ├── solution_03_11.py
    │   ├── solution_03_12.py
    │   ├── solution_03_14_01.py
    │   ├── solution_03_14_02.py
    │   ├── solution_03_14_03.py
    │   ├── solution_03_15.py
    │   ├── solution_03_16_01.py
    │   ├── solution_03_16_02.py
    │   ├── solution_04_03.py
    │   ├── solution_04_04.py
    │   ├── solution_04_07_01.sh
    │   ├── solution_04_07_02.sh
    │   ├── solution_04_08.sh
    │   ├── solution_04_11.py
    │   ├── solution_04_12_01.py
    │   ├── solution_04_12_02.py
    │   ├── test_01_02_01.py
    │   ├── test_01_02_02.py
    │   ├── test_01_02_03.py
    │   ├── test_01_03_01.py
    │   ├── test_01_03_02.py
    │   ├── test_01_04.py
    │   ├── test_01_07.py
    │   ├── test_01_08_01.py
    │   ├── test_01_08_02.py
    │   ├── test_01_09.py
    │   ├── test_01_11.py
    │   ├── test_01_12_01.py
    │   ├── test_01_12_02.py
    │   ├── test_01_12_03.py
    │   ├── test_02_02_01.py
    │   ├── test_02_02_02.py
    │   ├── test_02_05_01.py
    │   ├── test_02_05_02.py
    │   ├── test_02_05_03.py
    │   ├── test_02_06.py
    │   ├── test_02_07.py
    │   ├── test_02_09.py
    │   ├── test_02_10_01.py
    │   ├── test_02_10_02.py
    │   ├── test_02_10_03.py
    │   ├── test_02_13.py
    │   ├── test_02_14.py
    │   ├── test_02_15.py
    │   ├── test_03_03.py
    │   ├── test_03_06.py
    │   ├── test_03_07.py
    │   ├── test_03_09_01.py
    │   ├── test_03_09_02.py
    │   ├── test_03_10_01.py
    │   ├── test_03_10_02.py
    │   ├── test_03_11.py
    │   ├── test_03_12.py
    │   ├── test_03_14_01.py
    │   ├── test_03_14_02.py
    │   ├── test_03_14_03.py
    │   ├── test_03_15.py
    │   ├── test_03_16_01.py
    │   ├── test_03_16_02.py
    │   ├── test_04_03.py
    │   ├── test_04_04.py
    │   ├── test_04_11.py
    │   ├── test_04_12_01.py
    │   ├── test_04_12_02.py
    │   ├── test_general.py
    │   ├── train_gadget.spacy
    │   └── tweets.json
    ├── en
    │   ├── bookquotes.json
    │   ├── capitals.json
    │   ├── config_gadget.cfg
    │   ├── countries.json
    │   ├── country_text.txt
    │   ├── dev_gadget.spacy
    │   ├── exc_01_02_01.py
    │   ├── exc_01_02_02.py
    │   ├── exc_01_02_03.py
    │   ├── exc_01_03_01.py
    │   ├── exc_01_03_02.py
    │   ├── exc_01_04.py
    │   ├── exc_01_07.py
    │   ├── exc_01_08_01.py
    │   ├── exc_01_08_02.py
    │   ├── exc_01_09.py
    │   ├── exc_01_11.py
    │   ├── exc_01_12_01.py
    │   ├── exc_01_12_02.py
    │   ├── exc_01_12_03.py
    │   ├── exc_02_02_01.py
    │   ├── exc_02_02_02.py
    │   ├── exc_02_05_01.py
    │   ├── exc_02_05_02.py
    │   ├── exc_02_05_03.py
    │   ├── exc_02_06.py
    │   ├── exc_02_07.py
    │   ├── exc_02_09.py
    │   ├── exc_02_10_01.py
    │   ├── exc_02_10_02.py
    │   ├── exc_02_10_03.py
    │   ├── exc_02_13.py
    │   ├── exc_02_14.py
    │   ├── exc_02_15.py
    │   ├── exc_03_03.py
    │   ├── exc_03_06.py
    │   ├── exc_03_07.py
    │   ├── exc_03_09_01.py
    │   ├── exc_03_09_02.py
    │   ├── exc_03_10_01.py
    │   ├── exc_03_10_02.py
    │   ├── exc_03_11.py
    │   ├── exc_03_12.py
    │   ├── exc_03_14_01.py
    │   ├── exc_03_14_02.py
    │   ├── exc_03_14_03.py
    │   ├── exc_03_15.py
    │   ├── exc_03_16_01.py
    │   ├── exc_03_16_02.py
    │   ├── exc_04_03.py
    │   ├── exc_04_04.py
    │   ├── exc_04_07_01.sh
    │   ├── exc_04_07_02.sh
    │   ├── exc_04_08.sh
    │   ├── exc_04_11.py
    │   ├── exc_04_12_01.py
    │   ├── exc_04_12_02.py
    │   ├── gadgets.json
    │   ├── iphone.json
    │   ├── solution_01_02_01.py
    │   ├── solution_01_02_02.py
    │   ├── solution_01_02_03.py
    │   ├── solution_01_03_01.py
    │   ├── solution_01_03_02.py
    │   ├── solution_01_04.py
    │   ├── solution_01_07.py
    │   ├── solution_01_08_01.py
    │   ├── solution_01_08_02.py
    │   ├── solution_01_09.py
    │   ├── solution_01_11.py
    │   ├── solution_01_12_01.py
    │   ├── solution_01_12_02.py
    │   ├── solution_01_12_03.py
    │   ├── solution_02_02_01.py
    │   ├── solution_02_02_02.py
    │   ├── solution_02_05_01.py
    │   ├── solution_02_05_02.py
    │   ├── solution_02_05_03.py
    │   ├── solution_02_06.py
    │   ├── solution_02_07.py
    │   ├── solution_02_09.py
    │   ├── solution_02_10_01.py
    │   ├── solution_02_10_02.py
    │   ├── solution_02_10_03.py
    │   ├── solution_02_13.py
    │   ├── solution_02_14.py
    │   ├── solution_02_15.py
    │   ├── solution_03_03.py
    │   ├── solution_03_06.py
    │   ├── solution_03_07.py
    │   ├── solution_03_09_01.py
    │   ├── solution_03_09_02.py
    │   ├── solution_03_10_01.py
    │   ├── solution_03_10_02.py
    │   ├── solution_03_11.py
    │   ├── solution_03_12.py
    │   ├── solution_03_14_01.py
    │   ├── solution_03_14_02.py
    │   ├── solution_03_14_03.py
    │   ├── solution_03_15.py
    │   ├── solution_03_16_01.py
    │   ├── solution_03_16_02.py
    │   ├── solution_04_03.py
    │   ├── solution_04_04.py
    │   ├── solution_04_07_01.sh
    │   ├── solution_04_07_02.sh
    │   ├── solution_04_08.sh
    │   ├── solution_04_11.py
    │   ├── solution_04_12_01.py
    │   ├── solution_04_12_02.py
    │   ├── test_01_02_01.py
    │   ├── test_01_02_02.py
    │   ├── test_01_02_03.py
    │   ├── test_01_03_01.py
    │   ├── test_01_03_02.py
    │   ├── test_01_04.py
    │   ├── test_01_07.py
    │   ├── test_01_08_01.py
    │   ├── test_01_08_02.py
    │   ├── test_01_09.py
    │   ├── test_01_11.py
    │   ├── test_01_12_01.py
    │   ├── test_01_12_02.py
    │   ├── test_01_12_03.py
    │   ├── test_02_02_01.py
    │   ├── test_02_02_02.py
    │   ├── test_02_05_01.py
    │   ├── test_02_05_02.py
    │   ├── test_02_05_03.py
    │   ├── test_02_06.py
    │   ├── test_02_07.py
    │   ├── test_02_09.py
    │   ├── test_02_10_01.py
    │   ├── test_02_10_02.py
    │   ├── test_02_10_03.py
    │   ├── test_02_13.py
    │   ├── test_02_14.py
    │   ├── test_02_15.py
    │   ├── test_03_03.py
    │   ├── test_03_06.py
    │   ├── test_03_07.py
    │   ├── test_03_09_01.py
    │   ├── test_03_09_02.py
    │   ├── test_03_10_01.py
    │   ├── test_03_10_02.py
    │   ├── test_03_11.py
    │   ├── test_03_12.py
    │   ├── test_03_14_01.py
    │   ├── test_03_14_02.py
    │   ├── test_03_14_03.py
    │   ├── test_03_15.py
    │   ├── test_03_16_01.py
    │   ├── test_03_16_02.py
    │   ├── test_04_03.py
    │   ├── test_04_04.py
    │   ├── test_04_11.py
    │   ├── test_04_12_01.py
    │   ├── test_04_12_02.py
    │   ├── test_general.py
    │   ├── train_gadget.spacy
    │   └── tweets.json
    ├── es
    │   ├── adidas.json
    │   ├── bookquotes.json
    │   ├── capitals.json
    │   ├── config_gadget.cfg
    │   ├── countries.json
    │   ├── country_text.txt
    │   ├── dev_gadget.spacy
    │   ├── exc_01_02_01.py
    │   ├── exc_01_02_02.py
    │   ├── exc_01_02_03.py
    │   ├── exc_01_03_01.py
    │   ├── exc_01_03_02.py
    │   ├── exc_01_04.py
    │   ├── exc_01_07.py
    │   ├── exc_01_08_01.py
    │   ├── exc_01_08_02.py
    │   ├── exc_01_09.py
    │   ├── exc_01_11.py
    │   ├── exc_01_12_01.py
    │   ├── exc_01_12_02.py
    │   ├── exc_01_12_03.py
    │   ├── exc_02_02_01.py
    │   ├── exc_02_02_02.py
    │   ├── exc_02_05_01.py
    │   ├── exc_02_05_02.py
    │   ├── exc_02_05_03.py
    │   ├── exc_02_06.py
    │   ├── exc_02_07.py
    │   ├── exc_02_09.py
    │   ├── exc_02_10_01.py
    │   ├── exc_02_10_02.py
    │   ├── exc_02_10_03.py
    │   ├── exc_02_13.py
    │   ├── exc_02_14.py
    │   ├── exc_02_15.py
    │   ├── exc_03_03.py
    │   ├── exc_03_06.py
    │   ├── exc_03_07.py
    │   ├── exc_03_09_01.py
    │   ├── exc_03_09_02.py
    │   ├── exc_03_10_01.py
    │   ├── exc_03_10_02.py
    │   ├── exc_03_11.py
    │   ├── exc_03_12.py
    │   ├── exc_03_14_01.py
    │   ├── exc_03_14_02.py
    │   ├── exc_03_14_03.py
    │   ├── exc_03_15.py
    │   ├── exc_03_16_01.py
    │   ├── exc_03_16_02.py
    │   ├── exc_04_03.py
    │   ├── exc_04_04.py
    │   ├── exc_04_07_01.sh
    │   ├── exc_04_07_02.sh
    │   ├── exc_04_08.sh
    │   ├── exc_04_11.py
    │   ├── exc_04_12_01.py
    │   ├── exc_04_12_02.py
    │   ├── gadgets.json
    │   ├── ropa.json
    │   ├── solution_01_02_01.py
    │   ├── solution_01_02_02.py
    │   ├── solution_01_02_03.py
    │   ├── solution_01_03_01.py
    │   ├── solution_01_03_02.py
    │   ├── solution_01_04.py
    │   ├── solution_01_07.py
    │   ├── solution_01_08_01.py
    │   ├── solution_01_08_02.py
    │   ├── solution_01_09.py
    │   ├── solution_01_11.py
    │   ├── solution_01_12_01.py
    │   ├── solution_01_12_02.py
    │   ├── solution_01_12_03.py
    │   ├── solution_02_02_01.py
    │   ├── solution_02_02_02.py
    │   ├── solution_02_05_01.py
    │   ├── solution_02_05_02.py
    │   ├── solution_02_05_03.py
    │   ├── solution_02_06.py
    │   ├── solution_02_07.py
    │   ├── solution_02_09.py
    │   ├── solution_02_10_01.py
    │   ├── solution_02_10_02.py
    │   ├── solution_02_10_03.py
    │   ├── solution_02_13.py
    │   ├── solution_02_14.py
    │   ├── solution_02_15.py
    │   ├── solution_03_03.py
    │   ├── solution_03_06.py
    │   ├── solution_03_07.py
    │   ├── solution_03_09_01.py
    │   ├── solution_03_09_02.py
    │   ├── solution_03_10_01.py
    │   ├── solution_03_10_02.py
    │   ├── solution_03_11.py
    │   ├── solution_03_12.py
    │   ├── solution_03_14_01.py
    │   ├── solution_03_14_02.py
    │   ├── solution_03_14_03.py
    │   ├── solution_03_15.py
    │   ├── solution_03_16_01.py
    │   ├── solution_03_16_02.py
    │   ├── solution_04_03.py
    │   ├── solution_04_04.py
    │   ├── solution_04_07_01.sh
    │   ├── solution_04_07_02.sh
    │   ├── solution_04_08.sh
    │   ├── solution_04_11.py
    │   ├── solution_04_12_01.py
    │   ├── solution_04_12_02.py
    │   ├── test_01_02_01.py
    │   ├── test_01_02_02.py
    │   ├── test_01_02_03.py
    │   ├── test_01_03_01.py
    │   ├── test_01_03_02.py
    │   ├── test_01_04.py
    │   ├── test_01_07.py
    │   ├── test_01_08_01.py
    │   ├── test_01_08_02.py
    │   ├── test_01_09.py
    │   ├── test_01_11.py
    │   ├── test_01_12_01.py
    │   ├── test_01_12_02.py
    │   ├── test_01_12_03.py
    │   ├── test_02_02_01.py
    │   ├── test_02_02_02.py
    │   ├── test_02_05_01.py
    │   ├── test_02_05_02.py
    │   ├── test_02_05_03.py
    │   ├── test_02_06.py
    │   ├── test_02_07.py
    │   ├── test_02_09.py
    │   ├── test_02_10_01.py
    │   ├── test_02_10_02.py
    │   ├── test_02_10_03.py
    │   ├── test_02_13.py
    │   ├── test_02_14.py
    │   ├── test_02_15.py
    │   ├── test_03_03.py
    │   ├── test_03_06.py
    │   ├── test_03_07.py
    │   ├── test_03_09_01.py
    │   ├── test_03_09_02.py
    │   ├── test_03_10_01.py
    │   ├── test_03_10_02.py
    │   ├── test_03_11.py
    │   ├── test_03_12.py
    │   ├── test_03_14_01.py
    │   ├── test_03_14_02.py
    │   ├── test_03_14_03.py
    │   ├── test_03_15.py
    │   ├── test_03_16_01.py
    │   ├── test_03_16_02.py
    │   ├── test_04_03.py
    │   ├── test_04_04.py
    │   ├── test_04_11.py
    │   ├── test_04_12_01.py
    │   ├── test_04_12_02.py
    │   ├── test_general.py
    │   ├── train_gadget.spacy
    │   └── tweets.json
    ├── fr
    │   ├── bookquotes.json
    │   ├── capitals.json
    │   ├── config_gadget.cfg
    │   ├── countries.json
    │   ├── country_text.txt
    │   ├── dev_gadget.spacy
    │   ├── exc_01_02_01.py
    │   ├── exc_01_02_02.py
    │   ├── exc_01_02_03.py
    │   ├── exc_01_03_01.py
    │   ├── exc_01_03_02.py
    │   ├── exc_01_04.py
    │   ├── exc_01_07.py
    │   ├── exc_01_08_01.py
    │   ├── exc_01_08_02.py
    │   ├── exc_01_09.py
    │   ├── exc_01_11.py
    │   ├── exc_01_12_01.py
    │   ├── exc_01_12_02.py
    │   ├── exc_01_12_03.py
    │   ├── exc_02_02_01.py
    │   ├── exc_02_02_02.py
    │   ├── exc_02_05_01.py
    │   ├── exc_02_05_02.py
    │   ├── exc_02_05_03.py
    │   ├── exc_02_06.py
    │   ├── exc_02_07.py
    │   ├── exc_02_09.py
    │   ├── exc_02_10_01.py
    │   ├── exc_02_10_02.py
    │   ├── exc_02_10_03.py
    │   ├── exc_02_13.py
    │   ├── exc_02_14.py
    │   ├── exc_02_15.py
    │   ├── exc_03_03.py
    │   ├── exc_03_06.py
    │   ├── exc_03_07.py
    │   ├── exc_03_09_01.py
    │   ├── exc_03_09_02.py
    │   ├── exc_03_10_01.py
    │   ├── exc_03_10_02.py
    │   ├── exc_03_11.py
    │   ├── exc_03_12.py
    │   ├── exc_03_14_01.py
    │   ├── exc_03_14_02.py
    │   ├── exc_03_14_03.py
    │   ├── exc_03_15.py
    │   ├── exc_03_16_01.py
    │   ├── exc_03_16_02.py
    │   ├── exc_04_03.py
    │   ├── exc_04_04.py
    │   ├── exc_04_07.py
    │   ├── exc_04_07_01.sh
    │   ├── exc_04_07_02.sh
    │   ├── exc_04_08.sh
    │   ├── exc_04_11.py
    │   ├── exc_04_12_01.py
    │   ├── exc_04_12_02.py
    │   ├── gadgets.json
    │   ├── iphone.json
    │   ├── solution_01_02_01.py
    │   ├── solution_01_02_02.py
    │   ├── solution_01_02_03.py
    │   ├── solution_01_03_01.py
    │   ├── solution_01_03_02.py
    │   ├── solution_01_04.py
    │   ├── solution_01_07.py
    │   ├── solution_01_08_01.py
    │   ├── solution_01_08_02.py
    │   ├── solution_01_09.py
    │   ├── solution_01_11.py
    │   ├── solution_01_12_01.py
    │   ├── solution_01_12_02.py
    │   ├── solution_01_12_03.py
    │   ├── solution_02_02_01.py
    │   ├── solution_02_02_02.py
    │   ├── solution_02_05_01.py
    │   ├── solution_02_05_02.py
    │   ├── solution_02_05_03.py
    │   ├── solution_02_06.py
    │   ├── solution_02_07.py
    │   ├── solution_02_09.py
    │   ├── solution_02_10_01.py
    │   ├── solution_02_10_02.py
    │   ├── solution_02_10_03.py
    │   ├── solution_02_13.py
    │   ├── solution_02_14.py
    │   ├── solution_02_15.py
    │   ├── solution_03_03.py
    │   ├── solution_03_06.py
    │   ├── solution_03_07.py
    │   ├── solution_03_09_01.py
    │   ├── solution_03_09_02.py
    │   ├── solution_03_10_01.py
    │   ├── solution_03_10_02.py
    │   ├── solution_03_11.py
    │   ├── solution_03_12.py
    │   ├── solution_03_14_01.py
    │   ├── solution_03_14_02.py
    │   ├── solution_03_14_03.py
    │   ├── solution_03_15.py
    │   ├── solution_03_16_01.py
    │   ├── solution_03_16_02.py
    │   ├── solution_04_03.py
    │   ├── solution_04_04.py
    │   ├── solution_04_07_01.sh
    │   ├── solution_04_07_02.sh
    │   ├── solution_04_08.sh
    │   ├── solution_04_11.py
    │   ├── solution_04_12_01.py
    │   ├── solution_04_12_02.py
    │   ├── test_01_02_01.py
    │   ├── test_01_02_02.py
    │   ├── test_01_02_03.py
    │   ├── test_01_03_01.py
    │   ├── test_01_03_02.py
    │   ├── test_01_04.py
    │   ├── test_01_07.py
    │   ├── test_01_08_01.py
    │   ├── test_01_08_02.py
    │   ├── test_01_09.py
    │   ├── test_01_11.py
    │   ├── test_01_12_01.py
    │   ├── test_01_12_02.py
    │   ├── test_01_12_03.py
    │   ├── test_02_02_01.py
    │   ├── test_02_02_02.py
    │   ├── test_02_05_01.py
    │   ├── test_02_05_02.py
    │   ├── test_02_05_03.py
    │   ├── test_02_06.py
    │   ├── test_02_07.py
    │   ├── test_02_09.py
    │   ├── test_02_10_01.py
    │   ├── test_02_10_02.py
    │   ├── test_02_10_03.py
    │   ├── test_02_13.py
    │   ├── test_02_14.py
    │   ├── test_02_15.py
    │   ├── test_03_03.py
    │   ├── test_03_06.py
    │   ├── test_03_07.py
    │   ├── test_03_09_01.py
    │   ├── test_03_09_02.py
    │   ├── test_03_10_01.py
    │   ├── test_03_10_02.py
    │   ├── test_03_11.py
    │   ├── test_03_12.py
    │   ├── test_03_14_01.py
    │   ├── test_03_14_02.py
    │   ├── test_03_14_03.py
    │   ├── test_03_15.py
    │   ├── test_03_16_01.py
    │   ├── test_03_16_02.py
    │   ├── test_04_03.py
    │   ├── test_04_04.py
    │   ├── test_04_07.py
    │   ├── test_04_11.py
    │   ├── test_04_12_01.py
    │   ├── test_04_12_02.py
    │   ├── test_general.py
    │   ├── train_gadget.spacy
    │   └── tweets.json
    ├── ja
    │   ├── bookquotes.json
    │   ├── capitals.json
    │   ├── config_gadget.cfg
    │   ├── countries.json
    │   ├── country_text.txt
    │   ├── dev_gadget.spacy
    │   ├── exc_01_02_01.py
    │   ├── exc_01_02_02.py
    │   ├── exc_01_02_03.py
    │   ├── exc_01_02_04.py
    │   ├── exc_01_03_01.py
    │   ├── exc_01_03_02.py
    │   ├── exc_01_04.py
    │   ├── exc_01_07.py
    │   ├── exc_01_08_01.py
    │   ├── exc_01_08_02.py
    │   ├── exc_01_09.py
    │   ├── exc_01_11.py
    │   ├── exc_01_12_01.py
    │   ├── exc_01_12_02.py
    │   ├── exc_01_12_03.py
    │   ├── exc_02_02_01.py
    │   ├── exc_02_02_02.py
    │   ├── exc_02_05_01.py
    │   ├── exc_02_05_02.py
    │   ├── exc_02_05_03.py
    │   ├── exc_02_06.py
    │   ├── exc_02_07.py
    │   ├── exc_02_09.py
    │   ├── exc_02_10_01.py
    │   ├── exc_02_10_02.py
    │   ├── exc_02_10_03.py
    │   ├── exc_02_13.py
    │   ├── exc_02_14.py
    │   ├── exc_02_15.py
    │   ├── exc_03_03.py
    │   ├── exc_03_06.py
    │   ├── exc_03_07.py
    │   ├── exc_03_09_01.py
    │   ├── exc_03_09_02.py
    │   ├── exc_03_10_01.py
    │   ├── exc_03_10_02.py
    │   ├── exc_03_11.py
    │   ├── exc_03_12.py
    │   ├── exc_03_14_01.py
    │   ├── exc_03_14_02.py
    │   ├── exc_03_14_03.py
    │   ├── exc_03_15.py
    │   ├── exc_03_16_01.py
    │   ├── exc_03_16_02.py
    │   ├── exc_04_03.py
    │   ├── exc_04_04.py
    │   ├── exc_04_06.py
    │   ├── exc_04_07_01.sh
    │   ├── exc_04_07_02.sh
    │   ├── exc_04_08.sh
    │   ├── exc_04_11.py
    │   ├── exc_04_12_01.py
    │   ├── exc_04_12_02.py
    │   ├── gadgets.json
    │   ├── iphone.json
    │   ├── solution_01_02_01.py
    │   ├── solution_01_02_02.py
    │   ├── solution_01_02_03.py
    │   ├── solution_01_02_04.py
    │   ├── solution_01_03_01.py
    │   ├── solution_01_03_02.py
    │   ├── solution_01_04.py
    │   ├── solution_01_07.py
    │   ├── solution_01_08_01.py
    │   ├── solution_01_08_02.py
    │   ├── solution_01_09.py
    │   ├── solution_01_11.py
    │   ├── solution_01_12_01.py
    │   ├── solution_01_12_02.py
    │   ├── solution_01_12_03.py
    │   ├── solution_02_02_01.py
    │   ├── solution_02_02_02.py
    │   ├── solution_02_05_01.py
    │   ├── solution_02_05_02.py
    │   ├── solution_02_05_03.py
    │   ├── solution_02_06.py
    │   ├── solution_02_07.py
    │   ├── solution_02_09.py
    │   ├── solution_02_10_01.py
    │   ├── solution_02_10_02.py
    │   ├── solution_02_10_03.py
    │   ├── solution_02_13.py
    │   ├── solution_02_14.py
    │   ├── solution_02_15.py
    │   ├── solution_03_03.py
    │   ├── solution_03_06.py
    │   ├── solution_03_07.py
    │   ├── solution_03_09_01.py
    │   ├── solution_03_09_02.py
    │   ├── solution_03_10_01.py
    │   ├── solution_03_10_02.py
    │   ├── solution_03_11.py
    │   ├── solution_03_12.py
    │   ├── solution_03_14_01.py
    │   ├── solution_03_14_02.py
    │   ├── solution_03_14_03.py
    │   ├── solution_03_15.py
    │   ├── solution_03_16_01.py
    │   ├── solution_03_16_02.py
    │   ├── solution_04_03.py
    │   ├── solution_04_04.py
    │   ├── solution_04_06.py
    │   ├── solution_04_07_01.sh
    │   ├── solution_04_07_02.sh
    │   ├── solution_04_08.sh
    │   ├── solution_04_11.py
    │   ├── solution_04_12_01.py
    │   ├── solution_04_12_02.py
    │   ├── test_01_02_01.py
    │   ├── test_01_02_02.py
    │   ├── test_01_02_03.py
    │   ├── test_01_02_04.py
    │   ├── test_01_03_01.py
    │   ├── test_01_03_02.py
    │   ├── test_01_04.py
    │   ├── test_01_07.py
    │   ├── test_01_08_01.py
    │   ├── test_01_08_02.py
    │   ├── test_01_09.py
    │   ├── test_01_11.py
    │   ├── test_01_12_01.py
    │   ├── test_01_12_02.py
    │   ├── test_01_12_03.py
    │   ├── test_02_02_01.py
    │   ├── test_02_02_02.py
    │   ├── test_02_05_01.py
    │   ├── test_02_05_02.py
    │   ├── test_02_05_03.py
    │   ├── test_02_06.py
    │   ├── test_02_07.py
    │   ├── test_02_09.py
    │   ├── test_02_10_01.py
    │   ├── test_02_10_02.py
    │   ├── test_02_10_03.py
    │   ├── test_02_13.py
    │   ├── test_02_14.py
    │   ├── test_02_15.py
    │   ├── test_03_03.py
    │   ├── test_03_06.py
    │   ├── test_03_07.py
    │   ├── test_03_09_01.py
    │   ├── test_03_09_02.py
    │   ├── test_03_10_01.py
    │   ├── test_03_10_02.py
    │   ├── test_03_11.py
    │   ├── test_03_12.py
    │   ├── test_03_14_01.py
    │   ├── test_03_14_02.py
    │   ├── test_03_14_03.py
    │   ├── test_03_15.py
    │   ├── test_03_16_01.py
    │   ├── test_03_16_02.py
    │   ├── test_04_03.py
    │   ├── test_04_04.py
    │   ├── test_04_06.py
    │   ├── test_04_07.py
    │   ├── test_04_10.py
    │   ├── test_04_11.py
    │   ├── test_04_12_01.py
    │   ├── test_04_12_02.py
    │   ├── train_gadget.spacy
    │   └── tweets.json
    ├── pt
    │   ├── bookquotes.json
    │   ├── capitals.json
    │   ├── config_gadget.cfg
    │   ├── countries.json
    │   ├── country_text.txt
    │   ├── dev_gadget.spacy
    │   ├── exc_01_02_01.py
    │   ├── exc_01_02_02.py
    │   ├── exc_01_02_03.py
    │   ├── exc_01_03_01.py
    │   ├── exc_01_03_02.py
    │   ├── exc_01_04.py
    │   ├── exc_01_07.py
    │   ├── exc_01_08_01.py
    │   ├── exc_01_08_02.py
    │   ├── exc_01_09.py
    │   ├── exc_01_11.py
    │   ├── exc_01_12_01.py
    │   ├── exc_01_12_02.py
    │   ├── exc_01_12_03.py
    │   ├── exc_02_02_01.py
    │   ├── exc_02_02_02.py
    │   ├── exc_02_05_01.py
    │   ├── exc_02_05_02.py
    │   ├── exc_02_05_03.py
    │   ├── exc_02_06.py
    │   ├── exc_02_07.py
    │   ├── exc_02_09.py
    │   ├── exc_02_10_01.py
    │   ├── exc_02_10_02.py
    │   ├── exc_02_10_03.py
    │   ├── exc_02_13.py
    │   ├── exc_02_14.py
    │   ├── exc_02_15.py
    │   ├── exc_03_03.py
    │   ├── exc_03_06.py
    │   ├── exc_03_07.py
    │   ├── exc_03_09_01.py
    │   ├── exc_03_09_02.py
    │   ├── exc_03_10_01.py
    │   ├── exc_03_10_02.py
    │   ├── exc_03_11.py
    │   ├── exc_03_12.py
    │   ├── exc_03_14_01.py
    │   ├── exc_03_14_02.py
    │   ├── exc_03_14_03.py
    │   ├── exc_03_15.py
    │   ├── exc_03_16_01.py
    │   ├── exc_03_16_02.py
    │   ├── exc_04_03.py
    │   ├── exc_04_04.py
    │   ├── exc_04_07_01.sh
    │   ├── exc_04_07_02.sh
    │   ├── exc_04_08.sh
    │   ├── exc_04_11.py
    │   ├── exc_04_12_01.py
    │   ├── exc_04_12_02.py
    │   ├── gadgets.json
    │   ├── iphone.json
    │   ├── solution_01_02_01.py
    │   ├── solution_01_02_02.py
    │   ├── solution_01_02_03.py
    │   ├── solution_01_03_01.py
    │   ├── solution_01_03_02.py
    │   ├── solution_01_04.py
    │   ├── solution_01_07.py
    │   ├── solution_01_08_01.py
    │   ├── solution_01_08_02.py
    │   ├── solution_01_09.py
    │   ├── solution_01_11.py
    │   ├── solution_01_12_01.py
    │   ├── solution_01_12_02.py
    │   ├── solution_01_12_03.py
    │   ├── solution_02_02_01.py
    │   ├── solution_02_02_02.py
    │   ├── solution_02_05_01.py
    │   ├── solution_02_05_02.py
    │   ├── solution_02_05_03.py
    │   ├── solution_02_06.py
    │   ├── solution_02_07.py
    │   ├── solution_02_09.py
    │   ├── solution_02_10_01.py
    │   ├── solution_02_10_02.py
    │   ├── solution_02_10_03.py
    │   ├── solution_02_13.py
    │   ├── solution_02_14.py
    │   ├── solution_02_15.py
    │   ├── solution_03_03.py
    │   ├── solution_03_06.py
    │   ├── solution_03_07.py
    │   ├── solution_03_09_01.py
    │   ├── solution_03_09_02.py
    │   ├── solution_03_10_01.py
    │   ├── solution_03_10_02.py
    │   ├── solution_03_11.py
    │   ├── solution_03_12.py
    │   ├── solution_03_14_01.py
    │   ├── solution_03_14_02.py
    │   ├── solution_03_14_03.py
    │   ├── solution_03_15.py
    │   ├── solution_03_16_01.py
    │   ├── solution_03_16_02.py
    │   ├── solution_04_03.py
    │   ├── solution_04_04.py
    │   ├── solution_04_07_01.sh
    │   ├── solution_04_07_02.sh
    │   ├── solution_04_08.sh
    │   ├── solution_04_11.py
    │   ├── solution_04_12_01.py
    │   ├── solution_04_12_02.py
    │   ├── test_01_02_01.py
    │   ├── test_01_02_02.py
    │   ├── test_01_02_03.py
    │   ├── test_01_03_01.py
    │   ├── test_01_03_02.py
    │   ├── test_01_04.py
    │   ├── test_01_07.py
    │   ├── test_01_08_01.py
    │   ├── test_01_08_02.py
    │   ├── test_01_09.py
    │   ├── test_01_11.py
    │   ├── test_01_12_01.py
    │   ├── test_01_12_02.py
    │   ├── test_01_12_03.py
    │   ├── test_02_02_01.py
    │   ├── test_02_02_02.py
    │   ├── test_02_05_01.py
    │   ├── test_02_05_02.py
    │   ├── test_02_05_03.py
    │   ├── test_02_06.py
    │   ├── test_02_07.py
    │   ├── test_02_09.py
    │   ├── test_02_10_01.py
    │   ├── test_02_10_02.py
    │   ├── test_02_10_03.py
    │   ├── test_02_13.py
    │   ├── test_02_14.py
    │   ├── test_02_15.py
    │   ├── test_03_03.py
    │   ├── test_03_06.py
    │   ├── test_03_07.py
    │   ├── test_03_09_01.py
    │   ├── test_03_09_02.py
    │   ├── test_03_10_01.py
    │   ├── test_03_10_02.py
    │   ├── test_03_11.py
    │   ├── test_03_12.py
    │   ├── test_03_14_01.py
    │   ├── test_03_14_02.py
    │   ├── test_03_14_03.py
    │   ├── test_03_15.py
    │   ├── test_03_16_01.py
    │   ├── test_03_16_02.py
    │   ├── test_04_03.py
    │   ├── test_04_04.py
    │   ├── test_04_11.py
    │   ├── test_04_12_01.py
    │   ├── test_04_12_02.py
    │   ├── train_gadget.spacy
    │   └── tweets.json
    └── zh
    │   ├── bookquotes.json
    │   ├── capitals.json
    │   ├── config_gadget.cfg
    │   ├── countries.json
    │   ├── country_text.txt
    │   ├── dev_gadget.spacy
    │   ├── exc_01_02_01.py
    │   ├── exc_01_02_02.py
    │   ├── exc_01_02_03.py
    │   ├── exc_01_02_04.py
    │   ├── exc_01_03_01.py
    │   ├── exc_01_03_02.py
    │   ├── exc_01_04.py
    │   ├── exc_01_07.py
    │   ├── exc_01_08_01.py
    │   ├── exc_01_08_02.py
    │   ├── exc_01_09.py
    │   ├── exc_01_11.py
    │   ├── exc_01_12_01.py
    │   ├── exc_01_12_02.py
    │   ├── exc_01_12_03.py
    │   ├── exc_02_02_01.py
    │   ├── exc_02_02_02.py
    │   ├── exc_02_05_01.py
    │   ├── exc_02_05_02.py
    │   ├── exc_02_05_03.py
    │   ├── exc_02_06.py
    │   ├── exc_02_07.py
    │   ├── exc_02_09.py
    │   ├── exc_02_10_01.py
    │   ├── exc_02_10_02.py
    │   ├── exc_02_10_03.py
    │   ├── exc_02_13.py
    │   ├── exc_02_14.py
    │   ├── exc_02_15.py
    │   ├── exc_03_03.py
    │   ├── exc_03_06.py
    │   ├── exc_03_07.py
    │   ├── exc_03_09_01.py
    │   ├── exc_03_09_02.py
    │   ├── exc_03_10_01.py
    │   ├── exc_03_10_02.py
    │   ├── exc_03_11.py
    │   ├── exc_03_12.py
    │   ├── exc_03_14_01.py
    │   ├── exc_03_14_02.py
    │   ├── exc_03_14_03.py
    │   ├── exc_03_15.py
    │   ├── exc_03_16_01.py
    │   ├── exc_03_16_02.py
    │   ├── exc_04_03.py
    │   ├── exc_04_04.py
    │   ├── exc_04_07_01.sh
    │   ├── exc_04_07_02.sh
    │   ├── exc_04_08.sh
    │   ├── exc_04_11.py
    │   ├── exc_04_12_01.py
    │   ├── exc_04_12_02.py
    │   ├── gadgets.json
    │   ├── iphone.json
    │   ├── solution_01_02_01.py
    │   ├── solution_01_02_02.py
    │   ├── solution_01_02_03.py
    │   ├── solution_01_02_04.py
    │   ├── solution_01_03_01.py
    │   ├── solution_01_03_02.py
    │   ├── solution_01_04.py
    │   ├── solution_01_07.py
    │   ├── solution_01_08_01.py
    │   ├── solution_01_08_02.py
    │   ├── solution_01_09.py
    │   ├── solution_01_11.py
    │   ├── solution_01_12_01.py
    │   ├── solution_01_12_02.py
    │   ├── solution_01_12_03.py
    │   ├── solution_02_02_01.py
    │   ├── solution_02_02_02.py
    │   ├── solution_02_05_01.py
    │   ├── solution_02_05_02.py
    │   ├── solution_02_05_03.py
    │   ├── solution_02_06.py
    │   ├── solution_02_07.py
    │   ├── solution_02_09.py
    │   ├── solution_02_10_01.py
    │   ├── solution_02_10_02.py
    │   ├── solution_02_10_03.py
    │   ├── solution_02_13.py
    │   ├── solution_02_14.py
    │   ├── solution_02_15.py
    │   ├── solution_03_03.py
    │   ├── solution_03_06.py
    │   ├── solution_03_07.py
    │   ├── solution_03_09_01.py
    │   ├── solution_03_09_02.py
    │   ├── solution_03_10_01.py
    │   ├── solution_03_10_02.py
    │   ├── solution_03_11.py
    │   ├── solution_03_12.py
    │   ├── solution_03_14_01.py
    │   ├── solution_03_14_02.py
    │   ├── solution_03_14_03.py
    │   ├── solution_03_15.py
    │   ├── solution_03_16_01.py
    │   ├── solution_03_16_02.py
    │   ├── solution_04_03.py
    │   ├── solution_04_04.py
    │   ├── solution_04_07_01.sh
    │   ├── solution_04_07_02.sh
    │   ├── solution_04_08.sh
    │   ├── solution_04_11.py
    │   ├── solution_04_12_01.py
    │   ├── solution_04_12_02.py
    │   ├── test_01_02_01.py
    │   ├── test_01_02_02.py
    │   ├── test_01_02_03.py
    │   ├── test_01_02_04.py
    │   ├── test_01_03_01.py
    │   ├── test_01_03_02.py
    │   ├── test_01_04.py
    │   ├── test_01_07.py
    │   ├── test_01_08_01.py
    │   ├── test_01_08_02.py
    │   ├── test_01_09.py
    │   ├── test_01_11.py
    │   ├── test_01_12_01.py
    │   ├── test_01_12_02.py
    │   ├── test_01_12_03.py
    │   ├── test_02_02_01.py
    │   ├── test_02_02_02.py
    │   ├── test_02_05_01.py
    │   ├── test_02_05_02.py
    │   ├── test_02_05_03.py
    │   ├── test_02_06.py
    │   ├── test_02_07.py
    │   ├── test_02_09.py
    │   ├── test_02_10_01.py
    │   ├── test_02_10_02.py
    │   ├── test_02_10_03.py
    │   ├── test_02_13.py
    │   ├── test_02_14.py
    │   ├── test_02_15.py
    │   ├── test_03_03.py
    │   ├── test_03_06.py
    │   ├── test_03_07.py
    │   ├── test_03_09_01.py
    │   ├── test_03_09_02.py
    │   ├── test_03_10_01.py
    │   ├── test_03_10_02.py
    │   ├── test_03_11.py
    │   ├── test_03_12.py
    │   ├── test_03_14_01.py
    │   ├── test_03_14_02.py
    │   ├── test_03_14_03.py
    │   ├── test_03_15.py
    │   ├── test_03_16_01.py
    │   ├── test_03_16_02.py
    │   ├── test_04_03.py
    │   ├── test_04_04.py
    │   ├── test_04_11.py
    │   ├── test_04_12_01.py
    │   ├── test_04_12_02.py
    │   ├── test_general.py
    │   ├── train_gadget.spacy
    │   └── weibo.json
├── gatsby-browser.js
├── gatsby-config.js
├── gatsby-node.js
├── locale.json
├── meta.json
├── netlify.toml
├── package-lock.json
├── package.json
├── src
    ├── components
    │   ├── button.js
    │   ├── choice.js
    │   ├── code.js
    │   ├── exercise.js
    │   ├── hint.js
    │   ├── home.js
    │   ├── juniper.js
    │   ├── layout.js
    │   ├── link.js
    │   ├── logo.js
    │   ├── seo.js
    │   ├── slides.js
    │   └── typography.js
    ├── context.js
    ├── markdown.js
    ├── pages
    │   ├── de.js
    │   ├── en.js
    │   ├── es.js
    │   ├── fr.js
    │   ├── index.js
    │   ├── ja.js
    │   ├── pt.js
    │   └── zh.js
    ├── styles
    │   ├── button.module.sass
    │   ├── chapter.module.sass
    │   ├── choice.module.sass
    │   ├── code.module.sass
    │   ├── exercise.module.sass
    │   ├── hint.module.sass
    │   ├── index.module.sass
    │   ├── index.sass
    │   ├── layout.module.sass
    │   ├── link.module.sass
    │   ├── plyr.css
    │   ├── reveal.css
    │   ├── slides.module.sass
    │   └── typography.module.sass
    └── templates
    │   └── chapter.js
├── static
    ├── dep_example.png
    ├── dep_example_de.png
    ├── dep_example_es.png
    ├── dep_example_fr.png
    ├── dep_example_ja.png
    ├── dep_example_zh.png
    ├── doc.png
    ├── doc_span.png
    ├── icon.png
    ├── icon_check.svg
    ├── icon_slides.svg
    ├── icon_video.svg
    ├── logos.svg
    ├── ner_example.png
    ├── ner_example_de.png
    ├── ner_example_es.png
    ├── ner_example_fr.png
    ├── ner_example_ja.png
    ├── ner_example_zh.png
    ├── package.png
    ├── package_de.png
    ├── package_es.png
    ├── package_fr.png
    ├── package_ja.png
    ├── package_meta.png
    ├── package_meta_de.png
    ├── package_meta_es.png
    ├── package_meta_fr.png
    ├── package_meta_zh.png
    ├── package_zh.png
    ├── pipeline.png
    ├── profile.jpg
    ├── social.jpg
    ├── social_de.jpg
    ├── social_es.jpg
    ├── social_fr.jpg
    ├── social_ja.jpg
    ├── social_pt.jpg
    ├── social_zh.jpg
    ├── span_indices.png
    ├── training.png
    ├── training_de.png
    ├── training_es.png
    ├── training_fr.png
    ├── training_zh.png
    ├── vocab_stringstore.png
    ├── vocab_stringstore_de.png
    ├── vocab_stringstore_es.png
    ├── vocab_stringstore_fr.png
    ├── vocab_stringstore_zh.png
    └── website.png
└── theme.sass


/.dockerignore:
--------------------------------------------------------------------------------
1 | # Ignore everything
2 | *
3 | 
4 | # Allow files and directories
5 | !/docker/**
6 | !gatsby*
7 | !package*.json
8 | !binder/requirements.txt


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |     globals: {
3 |         __PATH_PREFIX__: true,
4 |     },
5 |     extends: `react-app`,
6 | }
7 | 


--------------------------------------------------------------------------------
/exercises/de/dev_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/de/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/de/exc_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy
 2 | import ____
 3 | 
 4 | # Erstelle ein deutsches nlp-Objekt
 5 | nlp = ____
 6 | 
 7 | # Verarbeite einen Text
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Drucke den Text des Dokuments
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/de/exc_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy
 2 | import ____
 3 | 
 4 | # Erstelle ein englisches nlp-Objekt
 5 | nlp = ____
 6 | 
 7 | # Verarbeite einen Text
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Drucke den Text des Dokuments
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/de/exc_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy
 2 | import ____
 3 | 
 4 | # Erstelle ein spanisches nlp-Objekt
 5 | nlp = ____
 6 | 
 7 | # Verarbeite einen Text
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Drucke den Text des Dokuments
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/de/exc_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy und erstelle ein deutsches nlp-Objekt
 2 | import ____
 3 | 
 4 | nlp = ____
 5 | 
 6 | # Verarbeite den Text
 7 | doc = ____("Ich mag niedliche Katzen und Faultiere.")
 8 | 
 9 | # Wähle den ersten Token aus
10 | erster_token = doc[____]
11 | 
12 | # Drucke den Text des ersten Tokens
13 | print(erster_token.____)
14 | 


--------------------------------------------------------------------------------
/exercises/de/exc_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Lade die Pipeline "de_core_news_sm"
 4 | nlp = ____
 5 | 
 6 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet."
 7 | 
 8 | # Verarbeite den Text
 9 | doc = ____
10 | 
11 | # Drucke den Text des Dokuments
12 | print(____.____)
13 | 


--------------------------------------------------------------------------------
/exercises/de/exc_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("de_core_news_sm")
 4 | 
 5 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet."
 6 | 
 7 | # Verarbeite den Text
 8 | doc = ____
 9 | 
10 | # Iteriere über die vorhergesagten Entitäten
11 | for ent in ____.____:
12 |     # Drucke den Text und das Label der Entität
13 |     print(ent.____, ____.____)
14 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | doc = nlp("Ich habe eine Katze")
 5 | 
 6 | # Schlage den Hash für das Wort "Katze" nach
 7 | katze_hash = ____.____.____[____]
 8 | print(katze_hash)
 9 | 
10 | # Schlage katze_hash nach, um den String zu erhalten
11 | katze_string = ____.____.____[____]
12 | print(katze_string)
13 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | doc = nlp("David Bowie hat das Label PER")
 5 | 
 6 | # Schlage den Hash für das String-Label "PER" nach
 7 | person_hash = ____.____.____[____]
 8 | print(person_hash)
 9 | 
10 | # Schlage person_hash nach, um den String zu erhalten
11 | person_string = ____.____.____[____]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | 
 5 | # Importiere die Klasse Doc
 6 | from ____ import ____
 7 | 
 8 | # Erwarteter Text: "spaCy ist cool!"
 9 | words = ["spaCy", "ist", "cool", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen
13 | doc = ____(____, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | 
 5 | # Importiere die Klasse Doc
 6 | from ____ import ____
 7 | 
 8 | # Erwarteter Text: "Na, alles klar?"
 9 | words = ["Na", ",", "alles", "klar", "?"]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | 
 5 | # Importiere die Klasse Doc
 6 | from ____ import ____
 7 | 
 8 | # Erwarteter Text: "Was, echt?!"
 9 | words = [____, ____, ____, ____, ____]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Lade die Pipeline "en_core_web_md"
 4 | nlp = ____
 5 | 
 6 | # Verarbeite einen Text
 7 | doc = nlp("Two bananas in pyjamas")
 8 | 
 9 | # Wähle den Vector des Tokens "bananas" aus
10 | bananas_vector = ____.____
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc1 = nlp("It's a warm summer day")
 6 | doc2 = nlp("It's sunny outside")
 7 | 
 8 | # Berechne die Ähnlichkeit von doc1 und doc2
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("TV and books")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Berechne die Ähnlichkeit der Tokens "TV" und "books"
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/de/exc_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.")
 6 | 
 7 | # Erstelle Spans für "great restaurant" und "really nice bar"
 8 | span1 = ____
 9 | span2 = ____
10 | 
11 | # Berechne die Ähnlichkeit der beiden Spans
12 | similarity = ____.____(____)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/de/exc_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Lade die Pipeline "de_core_news_sm"
 4 | nlp = ____
 5 | 
 6 | # Drucke die Namen der Pipeline-Komponenten
 7 | print(____.____)
 8 | 
 9 | # Drucke die komplette Pipeline mit (name, component) Tuples
10 | print(____.____)
11 | 


--------------------------------------------------------------------------------
/exercises/de/exc_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("de_core_news_sm")
 5 | 
 6 | with open("exercises/de/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Verarbeite den Text und drucke die Nomen
10 | for text in TEXTS:
11 |     doc = nlp(text)
12 |     print([token.text for token in doc if token.pos_ == "NOUN"])
13 | 


--------------------------------------------------------------------------------
/exercises/de/exc_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("de_core_news_sm")
 5 | 
 6 | with open("exercises/de/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Verarbeite den Text und drucke die Entitäten
10 | docs = [nlp(text) for text in TEXTS]
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/de/exc_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("de")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Erstelle eine Liste von Patterns für den PhraseMatcher
8 | patterns = [nlp(person) for person in people]
9 | 


--------------------------------------------------------------------------------
/exercises/de/exc_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("de_core_news_sm")
 4 | text = (
 5 |     "Chick-fil-A, ein Wortspiel mit der amerikanischen Aussprache von „Filet“, "
 6 |     "ist der Name einer 1946 gegründeten amerikanischen Schnellrestaurantkette, "
 7 |     "die sich auf den Verkauf von Hühnerfleischprodukten spezialisiert hat."
 8 | )
 9 | 
10 | # Wende nur den Tokenizer an
11 | doc = nlp(text)
12 | print([token.text for token in doc])
13 | 


--------------------------------------------------------------------------------
/exercises/de/exc_03_16_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("de_core_news_sm")
 4 | text = (
 5 |     "Die McDonald’s Corporation ist ein Betreiber und Franchisegeber von "
 6 |     "weltweit vertretenen Schnellrestaurants."
 7 | )
 8 | 
 9 | # Deaktiviere den Tagger und den Lemmatizer
10 | with ____.____(____):
11 |     # Verarbeite den Text
12 |     doc = ____
13 |     # Drucke die Entitäten im Doc
14 |     print(____)
15 | 


--------------------------------------------------------------------------------
/exercises/de/exc_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ ____ --____ ____ --____ ____
2 | 


--------------------------------------------------------------------------------
/exercises/de/exc_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/de/exc_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____
2 | 


--------------------------------------------------------------------------------
/exercises/de/iphone.json:
--------------------------------------------------------------------------------
1 | [
2 |     "iPhone X vorbestellen: So geht's",
3 |     "Das iPhone X kommt bald",
4 |     "soll ich 1000 € für das neue iphone x ausgeben?",
5 |     "Die Testberichte des iPhone 8 sind da",
6 |     "iPhone 11 vs iPhone 8: Ein Quantensprung",
7 |     "Ich brauche ein neues Smartphone. Hat jemand Tipps?"
8 | ]
9 | 


--------------------------------------------------------------------------------
/exercises/de/solution_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy
 2 | import spacy
 3 | 
 4 | # Erstelle ein deutsches nlp-Objekt
 5 | nlp = spacy.blank("de")
 6 | 
 7 | # Verarbeite einen Text
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Drucke den Text des Dokuments
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/de/solution_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy
 2 | import spacy
 3 | 
 4 | # Erstelle ein englisches nlp-Objekt
 5 | nlp = spacy.blank("en")
 6 | 
 7 | # Verarbeite einen Text
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Drucke den Text des Dokuments
11 | print(doc.text)


--------------------------------------------------------------------------------
/exercises/de/solution_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy
 2 | import spacy
 3 | 
 4 | # Erstelle ein spanisches nlp-Objekt
 5 | nlp = spacy.blank("es")
 6 | 
 7 | # Verarbeite einen Text
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Drucke den Text des Dokuments
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/de/solution_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importiere spaCy und erstelle ein deutsches nlp-Objekt
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("de")
 5 | 
 6 | # Verarbeite den Text
 7 | doc = nlp("Ich mag niedliche Katzen und Faultiere.")
 8 | 
 9 | # Wähle den ersten Token aus
10 | erster_token = doc[0]
11 | 
12 | # Drucke den Text des ersten Tokens
13 | print(erster_token.text)
14 | 


--------------------------------------------------------------------------------
/exercises/de/solution_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Lade die Pipeline "de_core_news_sm"
 4 | nlp = spacy.load("de_core_news_sm")
 5 | 
 6 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet."
 7 | 
 8 | # Verarbeite den Text
 9 | doc = nlp(text)
10 | 
11 | # Drucke den Text des Dokuments
12 | print(doc.text)
13 | 


--------------------------------------------------------------------------------
/exercises/de/solution_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("de_core_news_sm")
 4 | 
 5 | text = "Apple wurde 1976 von Steve Wozniak, Steve Jobs und Ron Wayne gegründet."
 6 | 
 7 | # Verarbeite den Text
 8 | doc = nlp(text)
 9 | 
10 | # Iteriere über die vorhergesagten Entitäten
11 | for ent in doc.ents:
12 |     # Drucke den Text und das Label der Entität
13 |     print(ent.text, ent.label_)
14 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | doc = nlp("Ich habe eine Katze")
 5 | 
 6 | # Schlage den Hash für das Wort "Katze" nach
 7 | katze_hash = nlp.vocab.strings["Katze"]
 8 | print(katze_hash)
 9 | 
10 | # Schlage katze_hash nach, um den String zu erhalten
11 | katze_string = nlp.vocab.strings[katze_hash]
12 | print(katze_string)
13 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | doc = nlp("David Bowie hat das Label PER")
 5 | 
 6 | # Schlage den Hash für das String-Label "PER" nach
 7 | person_hash = nlp.vocab.strings["PER"]
 8 | print(person_hash)
 9 | 
10 | # Schlage person_hash nach, um den String zu erhalten
11 | person_string = nlp.vocab.strings[person_hash]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | 
 5 | # Importiere die Klasse Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Erwarteter Text: "spaCy ist cool!"
 9 | words = ["spaCy", "ist", "cool", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | 
 5 | # Importiere die Klasse Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Erwarteter Text: "Na, alles klar?"
 9 | words = ["Na", ",", "alles", "klar", "?"]
10 | spaces = [False, True, True, False, False]
11 | 
12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("de")
 4 | 
 5 | # Importiere die Klasse Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Erwarteter Text: "Was, echt?!"
 9 | words = ["Was", ",", "echt", "?", "!"]
10 | spaces = [False, True, False, False, False]
11 | 
12 | # Erstelle ein Doc mit den Wörtern und Leerzeichen
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("de_core_news_sm")
 4 | doc = nlp("Berlin gefällt mir sehr gut")
 5 | 
 6 | # Iteriere über die Tokens
 7 | for token in doc:
 8 |     # Teste, ob der aktuelle Token ein Eigenname ist
 9 |     if token.pos_ == "PROPN":
10 |         # Teste, ob der nächste Token ein Verb ist
11 |         if doc[token.i + 1].pos_ == "VERB":
12 |             print("Eigenname vor Verb gefunden:", token.text)
13 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Lade die Pipeline "en_core_web_md"
 4 | nlp = spacy.load("en_core_web_md")
 5 | 
 6 | # Verarbeite einen Text
 7 | doc = nlp("Two bananas in pyjamas")
 8 | 
 9 | # Wähle den Vector des Tokens "bananas" aus
10 | bananas_vector = doc[1].vector
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc1 = nlp("It's a warm summer day")
 6 | doc2 = nlp("It's sunny outside")
 7 | 
 8 | # Berechne die Ähnlichkeit von doc1 und doc2
 9 | similarity = doc1.similarity(doc2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("TV and books")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Berechne die Ähnlichkeit der Tokens "TV" und "books"
 9 | similarity = token1.similarity(token2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/de/solution_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.")
 6 | 
 7 | # Erstelle Spans für "great restaurant" und "really nice bar"
 8 | span1 = doc[3:5]
 9 | span2 = doc[12:15]
10 | 
11 | # Berechne die Ähnlichkeit der beiden Spans
12 | similarity = span1.similarity(span2)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/de/solution_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Lade die Pipeline "de_core_news_sm"
 4 | nlp = spacy.load("de_core_news_sm")
 5 | 
 6 | # Drucke die Namen der Pipeline-Komponenten
 7 | print(nlp.pipe_names)
 8 | 
 9 | # Drucke die komplette Pipeline mit (name, component) Tuples
10 | print(nlp.pipeline)
11 | 


--------------------------------------------------------------------------------
/exercises/de/solution_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("de_core_news_sm")
 5 | 
 6 | with open("exercises/de/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Verarbeite den Text und drucke die Nomen
10 | for doc in nlp.pipe(TEXTS):
11 |     print([token.text for token in doc if token.pos_ == "NOUN"])
12 | 


--------------------------------------------------------------------------------
/exercises/de/solution_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("de_core_news_sm")
 5 | 
 6 | with open("exercises/de/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Verarbeite den Text und drucke die Entitäten
10 | docs = list(nlp.pipe(TEXTS))
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/de/solution_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("de")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Erstelle eine Liste von Patterns für den PhraseMatcher
8 | patterns = list(nlp.pipe(people))
9 | 


--------------------------------------------------------------------------------
/exercises/de/solution_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy init config ./config.cfg --lang de --pipeline ner
2 | 


--------------------------------------------------------------------------------
/exercises/de/solution_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/de/solution_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy train ./exercises/de/config_gadget.cfg --output ./output --paths.train ./exercises/de/train_gadget.spacy --paths.dev ./exercises/de/dev_gadget.spacy
2 | 


--------------------------------------------------------------------------------
/exercises/de/test_02_05_01.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "from spacy.tokens import Doc" in __solution__
 4 |     ), "Importierst du die Klasse Doc?"
 5 |     assert (
 6 |         doc.text == "spaCy ist cool!"
 7 |     ), "Bist du dir sicher, dass du das Doc richtig erstellt hast?"
 8 |     assert "print(doc.text)" in __solution__, "Druckst du den Text des Docs?"
 9 |     __msg__.good("Super!")
10 | 


--------------------------------------------------------------------------------
/exercises/de/test_02_09.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         'spacy.load("en_core_web_md")' in __solution__
 4 |     ), "Lädst du die mittelgroße Pipeline?"
 5 |     assert "doc[1].vector" in __solution__, "Greifst du auf den richtigen Vector zu?"
 6 |     __msg__.good(
 7 |         "Bravo! In der nächsten Übung wirst du spaCy benutzen, um mithilfe von "
 8 |         "Wortvektoren Ähnlichkeiten von Dokumenten, Spans und Tokens zu berechnen."
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/de/test_02_10_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__
4 |     ), "Vergleichst du die Ähnlichkeit der zwei Docs?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "Der Ähnlichkeitswert muss eine Zahl zwischen 0 und 1 sein. Hast du ihn korrekt berechnet?"
8 |     __msg__.good("Gut gemacht!")
9 | 


--------------------------------------------------------------------------------
/exercises/de/test_02_10_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__
4 |     ), "Vergleichst du die Ähnlichkeit der zwei Tokens?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "Der Ähnlichkeitswert muss eine Zahl zwischen 0 und 1 sein. Hast du ihn korrekt berechnet?"
8 |     __msg__.good("Prima!")
9 | 


--------------------------------------------------------------------------------
/exercises/de/test_03_14_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "for doc in nlp.pipe(TEXTS)" in __solution__
4 |     ), "Iterierst du über die Docs, die per yield von nlp.pipe ausgegeben werden?"
5 |     __msg__.good("Super!")
6 | 


--------------------------------------------------------------------------------
/exercises/de/test_03_14_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "docs = list(nlp.pipe(TEXTS))" in __solution__
4 |     ), "Verwendest du nlp.pipe in einer Liste?"
5 |     __msg__.good("Gute Arbeit!")
6 | 


--------------------------------------------------------------------------------
/exercises/de/test_03_14_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "patterns = list(nlp.pipe(people))" in __solution__
 4 |     ), "Verwendest du nlp.pipe in einer Liste?"
 5 | 
 6 |     __msg__.good(
 7 |         "Gut gemacht! Als nächstes schauen wir uns ein praktisches Beispiel "
 8 |         "an, das nlp.pipe verwendet, um Dokumente mit zusätzlichen Metadaten "
 9 |         "zu verarbeiten."
10 |     )
11 | 


--------------------------------------------------------------------------------
/exercises/de/test_03_16_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc = nlp.make_doc(text)" in __solution__
4 |         or "doc = nlp.tokenizer(text)" in __solution__
5 |     ), "Verwendest du tatsächlich nur den Tokenizer?"
6 | 
7 |     __msg__.good("Sehr schön!")
8 | 


--------------------------------------------------------------------------------
/exercises/de/train_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/de/train_gadget.spacy


--------------------------------------------------------------------------------
/exercises/en/dev_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/en/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/en/exc_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy
 2 | import ____
 3 | 
 4 | # Create the English nlp object
 5 | nlp = ____
 6 | 
 7 | # Process a text
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Print the document text
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/en/exc_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy
 2 | import ____
 3 | 
 4 | # Create the German nlp object
 5 | nlp = ____
 6 | 
 7 | # Process a text (this is German for: "Kind regards!")
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Print the document text
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/en/exc_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy
 2 | import ____
 3 | 
 4 | # Create the Spanish nlp object
 5 | nlp = ____
 6 | 
 7 | # Process a text (this is Spanish for: "How are you?")
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Print the document text
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/en/exc_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy and create the English nlp object
 2 | import ____
 3 | 
 4 | nlp = ____
 5 | 
 6 | # Process the text
 7 | doc = ____("I like tree kangaroos and narwhals.")
 8 | 
 9 | # Select the first token
10 | first_token = doc[____]
11 | 
12 | # Print the first token's text
13 | print(first_token.____)
14 | 


--------------------------------------------------------------------------------
/exercises/en/exc_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Load the "en_core_web_sm" pipeline
 4 | nlp = ____
 5 | 
 6 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value"
 7 | 
 8 | # Process the text
 9 | doc = ____
10 | 
11 | # Print the document text
12 | print(____.____)
13 | 


--------------------------------------------------------------------------------
/exercises/en/exc_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_sm")
 4 | 
 5 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value"
 6 | 
 7 | # Process the text
 8 | doc = ____
 9 | 
10 | # Iterate over the predicted entities
11 | for ent in ____.____:
12 |     # Print the entity text and its label
13 |     print(ent.____, ____.____)
14 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | doc = nlp("I have a cat")
 5 | 
 6 | # Look up the hash for the word "cat"
 7 | cat_hash = ____.____.____[____]
 8 | print(cat_hash)
 9 | 
10 | # Look up the cat_hash to get the string
11 | cat_string = ____.____.____[____]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | doc = nlp("David Bowie is a PERSON")
 5 | 
 6 | # Look up the hash for the string label "PERSON"
 7 | person_hash = ____.____.____[____]
 8 | print(person_hash)
 9 | 
10 | # Look up the person_hash to get the string
11 | person_string = ____.____.____[____]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # Import the Doc class
 6 | from ____ import ____
 7 | 
 8 | # Desired text: "spaCy is cool!"
 9 | words = ["spaCy", "is", "cool", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # Create a Doc from the words and spaces
13 | doc = ____(____, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # Import the Doc class
 6 | from ____ import ____
 7 | 
 8 | # Desired text: "Go, get started!"
 9 | words = ["Go", ",", "get", "started", "!"]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Create a Doc from the words and spaces
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # Import the Doc class
 6 | from ____ import ____
 7 | 
 8 | # Desired text: "Oh, really?!"
 9 | words = [____, ____, ____, ____, ____]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Create a Doc from the words and spaces
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Load the en_core_web_md pipeline
 4 | nlp = ____
 5 | 
 6 | # Process a text
 7 | doc = nlp("Two bananas in pyjamas")
 8 | 
 9 | # Get the vector for the token "bananas"
10 | bananas_vector = ____.____
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc1 = nlp("It's a warm summer day")
 6 | doc2 = nlp("It's sunny outside")
 7 | 
 8 | # Get the similarity of doc1 and doc2
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("TV and books")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Get the similarity of the tokens "TV" and "books"
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/en/exc_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.")
 6 | 
 7 | # Create spans for "great restaurant" and "really nice bar"
 8 | span1 = ____
 9 | span2 = ____
10 | 
11 | # Get the similarity of the spans
12 | similarity = ____.____(____)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/en/exc_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Load the en_core_web_sm pipeline
 4 | nlp = ____
 5 | 
 6 | # Print the names of the pipeline components
 7 | print(____.____)
 8 | 
 9 | # Print the full pipeline of (name, component) tuples
10 | print(____.____)
11 | 


--------------------------------------------------------------------------------
/exercises/en/exc_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("en_core_web_sm")
 5 | 
 6 | with open("exercises/en/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Process the texts and print the adjectives
10 | for text in TEXTS:
11 |     doc = nlp(text)
12 |     print([token.text for token in doc if token.pos_ == "ADJ"])
13 | 


--------------------------------------------------------------------------------
/exercises/en/exc_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("en_core_web_sm")
 5 | 
 6 | with open("exercises/en/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Process the texts and print the entities
10 | docs = [nlp(text) for text in TEXTS]
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/en/exc_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("en")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Create a list of patterns for the PhraseMatcher
8 | patterns = [nlp(person) for person in people]
9 | 


--------------------------------------------------------------------------------
/exercises/en/exc_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_sm")
 4 | text = (
 5 |     "Chick-fil-A is an American fast food restaurant chain headquartered in "
 6 |     "the city of College Park, Georgia, specializing in chicken sandwiches."
 7 | )
 8 | 
 9 | # Only tokenize the text
10 | doc = nlp(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/en/exc_03_16_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_sm")
 4 | text = (
 5 |     "Chick-fil-A is an American fast food restaurant chain headquartered in "
 6 |     "the city of College Park, Georgia, specializing in chicken sandwiches."
 7 | )
 8 | 
 9 | # Disable the tagger and lemmatizer
10 | with ____.____(____):
11 |     # Process the text
12 |     doc = ____
13 |     # Print the entities in the doc
14 |     print(____)
15 | 


--------------------------------------------------------------------------------
/exercises/en/exc_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ ____ --___ ____ --____ ____
2 | 


--------------------------------------------------------------------------------
/exercises/en/exc_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/en/exc_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____
2 | 


--------------------------------------------------------------------------------
/exercises/en/iphone.json:
--------------------------------------------------------------------------------
1 | [
2 |   "How to preorder the iPhone X",
3 |   "iPhone X is coming",
4 |   "Should I pay $1,000 for the iPhone X?",
5 |   "The iPhone 8 reviews are here",
6 |   "iPhone 11 vs iPhone 8: What's the difference?",
7 |   "I need a new phone! Any tips?"
8 | ]
9 | 


--------------------------------------------------------------------------------
/exercises/en/solution_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy
 2 | import spacy
 3 | 
 4 | # Create the English nlp object
 5 | nlp = spacy.blank("en")
 6 | 
 7 | # Process a text
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Print the document text
11 | print(doc.text)
12 | 
13 | 


--------------------------------------------------------------------------------
/exercises/en/solution_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy
 2 | import spacy
 3 | 
 4 | # Create the German nlp object
 5 | nlp = spacy.blank("de")
 6 | 
 7 | # Process a text (this is German for: "Kind regards!")
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Print the document text
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/en/solution_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy
 2 | import spacy
 3 | 
 4 | # Create the Spanish nlp object
 5 | nlp = spacy.blank("es")
 6 | 
 7 | # Process a text (this is Spanish for: "How are you?")
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Print the document text
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/en/solution_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Import spaCy and create the English nlp object
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("en")
 5 | 
 6 | # Process the text
 7 | doc = nlp("I like tree kangaroos and narwhals.")
 8 | 
 9 | # Select the first token
10 | first_token = doc[0]
11 | 
12 | # Print the first token's text
13 | print(first_token.text)
14 | 


--------------------------------------------------------------------------------
/exercises/en/solution_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Load the "en_core_web_sm" pipeline
 4 | nlp = spacy.load("en_core_web_sm")
 5 | 
 6 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value"
 7 | 
 8 | # Process the text
 9 | doc = nlp(text)
10 | 
11 | # Print the document text
12 | print(doc.text)
13 | 


--------------------------------------------------------------------------------
/exercises/en/solution_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_sm")
 4 | 
 5 | text = "It’s official: Apple is the first U.S. public company to reach a $1 trillion market value"
 6 | 
 7 | # Process the text
 8 | doc = nlp(text)
 9 | 
10 | # Iterate over the predicted entities
11 | for ent in doc.ents:
12 |     # Print the entity text and its label
13 |     print(ent.text, ent.label_)
14 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | doc = nlp("I have a cat")
 5 | 
 6 | # Look up the hash for the word "cat"
 7 | cat_hash = nlp.vocab.strings["cat"]
 8 | print(cat_hash)
 9 | 
10 | # Look up the cat_hash to get the string
11 | cat_string = nlp.vocab.strings[cat_hash]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | doc = nlp("David Bowie is a PERSON")
 5 | 
 6 | # Look up the hash for the string label "PERSON"
 7 | person_hash = nlp.vocab.strings["PERSON"]
 8 | print(person_hash)
 9 | 
10 | # Look up the person_hash to get the string
11 | person_string = nlp.vocab.strings[person_hash]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # Import the Doc class
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Desired text: "spaCy is cool!"
 9 | words = ["spaCy", "is", "cool", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # Create a Doc from the words and spaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # Import the Doc class
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Desired text: "Go, get started!"
 9 | words = ["Go", ",", "get", "started", "!"]
10 | spaces = [False, True, True, False, False]
11 | 
12 | # Create a Doc from the words and spaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # Import the Doc class
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Desired text: "Oh, really?!"
 9 | words = ["Oh", ",", "really", "?", "!"]
10 | spaces = [False, True, False, False, False]
11 | 
12 | # Create a Doc from the words and spaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_sm")
 4 | doc = nlp("Berlin looks like a nice city")
 5 | 
 6 | # Iterate over the tokens
 7 | for token in doc:
 8 |     # Check if the current token is a proper noun
 9 |     if token.pos_ == "PROPN":
10 |         # Check if the next token is a verb
11 |         if doc[token.i + 1].pos_ == "VERB":
12 |             print("Found proper noun before a verb:", token.text)
13 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Load the en_core_web_md pipeline
 4 | nlp = spacy.load("en_core_web_md")
 5 | 
 6 | # Process a text
 7 | doc = nlp("Two bananas in pyjamas")
 8 | 
 9 | # Get the vector for the token "bananas"
10 | bananas_vector = doc[1].vector
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc1 = nlp("It's a warm summer day")
 6 | doc2 = nlp("It's sunny outside")
 7 | 
 8 | # Get the similarity of doc1 and doc2
 9 | similarity = doc1.similarity(doc2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("TV and books")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Get the similarity of the tokens "TV" and "books"
 9 | similarity = token1.similarity(token2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/en/solution_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_md")
 4 | 
 5 | doc = nlp("This was a great restaurant. Afterwards, we went to a really nice bar.")
 6 | 
 7 | # Create spans for "great restaurant" and "really nice bar"
 8 | span1 = doc[3:5]
 9 | span2 = doc[12:15]
10 | 
11 | # Get the similarity of the spans
12 | similarity = span1.similarity(span2)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/en/solution_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Load the en_core_web_sm pipeline
 4 | nlp = spacy.load("en_core_web_sm")
 5 | 
 6 | # Print the names of the pipeline components
 7 | print(nlp.pipe_names)
 8 | 
 9 | # Print the full pipeline of (name, component) tuples
10 | print(nlp.pipeline)
11 | 


--------------------------------------------------------------------------------
/exercises/en/solution_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("en_core_web_sm")
 5 | 
 6 | with open("exercises/en/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Process the texts and print the adjectives
10 | for doc in nlp.pipe(TEXTS):
11 |     print([token.text for token in doc if token.pos_ == "ADJ"])
12 | 


--------------------------------------------------------------------------------
/exercises/en/solution_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("en_core_web_sm")
 5 | 
 6 | with open("exercises/en/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Process the texts and print the entities
10 | docs = list(nlp.pipe(TEXTS))
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/en/solution_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("en")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Create a list of patterns for the PhraseMatcher
8 | patterns = list(nlp.pipe(people))
9 | 


--------------------------------------------------------------------------------
/exercises/en/solution_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("en_core_web_sm")
 4 | text = (
 5 |     "Chick-fil-A is an American fast food restaurant chain headquartered in "
 6 |     "the city of College Park, Georgia, specializing in chicken sandwiches."
 7 | )
 8 | 
 9 | # Only tokenize the text
10 | doc = nlp.make_doc(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/en/solution_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy init config ./config.cfg --lang en --pipeline ner
2 | 


--------------------------------------------------------------------------------
/exercises/en/solution_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/en/solution_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy train ./exercises/en/config_gadget.cfg --output ./output --paths.train ./exercises/en/train_gadget.spacy --paths.dev ./exercises/en/dev_gadget.spacy
2 | 


--------------------------------------------------------------------------------
/exercises/en/test_02_05_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "from spacy.tokens import Doc" in __solution__
4 |     ), "Are you importing the Doc class correctly?"
5 |     assert doc.text == "spaCy is cool!", "Are you sure you created the Doc correctly?"
6 |     assert "print(doc.text)" in __solution__, "Are you printing the Doc's text?"
7 |     __msg__.good("Well done!")
8 | 


--------------------------------------------------------------------------------
/exercises/en/test_02_10_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__
4 |     ), "Are you comparing the similarity of the two docs?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "The value of similarity needs to be a float. Did you calculate it correctly?"
8 |     __msg__.good("Well done!")
9 | 


--------------------------------------------------------------------------------
/exercises/en/test_02_10_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__
4 |     ), "Are you comparing the similarity of the two tokens?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "The value of similarity needs to be a float. Did you calculate it correctly?"
8 |     __msg__.good("Nicely done!")
9 | 


--------------------------------------------------------------------------------
/exercises/en/test_03_14_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "for doc in nlp.pipe(TEXTS)" in __solution__
4 |     ), "Are you iterating over docs yielded by nlp.pipe?"
5 |     __msg__.good("Nice!")
6 | 


--------------------------------------------------------------------------------
/exercises/en/test_03_14_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "docs = list(nlp.pipe(TEXTS))" in __solution__
4 |     ), "Are you using nlp.pipe wrapped in a list?"
5 |     __msg__.good("Great work!")
6 | 


--------------------------------------------------------------------------------
/exercises/en/test_03_14_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "patterns = list(nlp.pipe(people))" in __solution__
 4 |     ), "Are you using nlp.pipe wrapped in a list?"
 5 | 
 6 |     __msg__.good(
 7 |         "Good job! Let's move on to a practical example that uses nlp.pipe "
 8 |         "to process documents with additional meta data."
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/en/test_03_16_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc = nlp.make_doc(text)" in __solution__
4 |         or "doc = nlp.tokenizer(text)" in __solution__
5 |     ), "Are you only tokenizing the text?"
6 | 
7 |     __msg__.good("Nicely done!")
8 | 


--------------------------------------------------------------------------------
/exercises/en/train_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/en/train_gadget.spacy


--------------------------------------------------------------------------------
/exercises/es/adidas.json:
--------------------------------------------------------------------------------
1 | [
2 |   "Cómo pre-ordenar los adidas ZX",
3 |   "Los nuevos adidas ZX vienen en camino",
4 |   "Debería pagar €200 por un par de adidas ZX?",
5 |   "Cuál es la diferencia entre los adidas 8000 y los adidas 4000?",
6 |   "Necesito nuevas zapatillas! ¿Qué me recomiendan?"
7 | ]
8 | 


--------------------------------------------------------------------------------
/exercises/es/dev_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/es/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/es/exc_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy
 2 | import ____
 3 | 
 4 | # Crea el objeto nlp para procesar inglés
 5 | nlp = ____
 6 | 
 7 | # Procesa un texto (aquí dice "Esta es una oración" en inglés)
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Imprime en pantalla el texto del documento
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/es/exc_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy
 2 | import ____
 3 | 
 4 | # Crea el objeto nlp para procesar alemán
 5 | nlp = ____
 6 | 
 7 | # Procesa un texto (aquí dice "Saludos cordiales!" en alemán)
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Imprime en pantalla el texto del documento
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/es/exc_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy
 2 | import ____
 3 | 
 4 | # Crea el objeto nlp para procesar español
 5 | nlp = ____
 6 | 
 7 | # Procesa un texto
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Imprime en pantalla el texto del documento
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/es/exc_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy y crea el objeto nlp para procesar español
 2 | import ____
 3 | 
 4 | nlp = ____
 5 | 
 6 | # Procesa el texto
 7 | doc = ____("Me gustan las panteras negras y los leones.")
 8 | 
 9 | # Selecciona el primer token
10 | first_token = doc[____]
11 | 
12 | # Imprime en pantalla el texto del token
13 | print(first_token.____)
14 | 


--------------------------------------------------------------------------------
/exercises/es/exc_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carga el modelo "es_core_news_sm"
 4 | nlp = ____
 5 | 
 6 | text = (
 7 |     "De acuerdo con la revista global de negocios Fortune, Apple fue "
 8 |     "la empresa más admirada en el mundo entre 2008 y 2012."
 9 | )
10 | 
11 | # Procesa el texto
12 | doc = ____
13 | 
14 | # Imprime en pantalla el texto del documento
15 | print(____.____)
16 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | doc = nlp("Yo tengo un gato")
 5 | 
 6 | # Busca el hash para la palabra "gato"
 7 | gato_hash = ____.____.____[____]
 8 | print(gato_hash)
 9 | 
10 | # Busca el gato_hash para obtener el string
11 | gato_string = ____.____.____[____]
12 | print(gato_string)
13 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | doc = nlp("David Bowie tiene el label PER")
 5 | 
 6 | # Busca el hash para el label del string "PER"
 7 | person_hash = ____.____.____[____]
 8 | print(person_hash)
 9 | 
10 | # Busca el person_hash para obtener el string
11 | person_string = ____.____.____[____]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | 
 5 | # Importa la clase Doc
 6 | from ____ import ____
 7 | 
 8 | # El texto deseado: "spaCy es divertido!"
 9 | words = ["spaCy", "es", "divertido", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # Crea un Doc a partir de las palabras y los espacios
13 | doc = ____(____, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | 
 5 | # Importa la clase Doc
 6 | from ____ import ____
 7 | 
 8 | # El texto deseado: "¡Vamos, empieza!"
 9 | words = ["¡", "Vamos", ",", "empieza", "!"]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Crea un Doc a partir de las palabras y los espacios
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | 
 5 | # Importa la clase Doc
 6 | from ____ import ____
 7 | 
 8 | # El texto deseado: "¡¿En serio?!"
 9 | words = [____, ____, ____, ____, ____, ____]
10 | spaces = [____, ____, ____, ____, ____, ____]
11 | 
12 | # Crea un Doc a partir de las palabras y los espacios
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carga el modelo es_core_news_md
 4 | nlp = ____
 5 | 
 6 | # Procesa un texto
 7 | doc = nlp("Hoy hice pan de banano")
 8 | 
 9 | # Obtén el vector para el token "banano"
10 | banano_vector = ____.____
11 | print(banano_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_md")
 4 | 
 5 | doc1 = nlp("Es un cálido día de verano")
 6 | doc2 = nlp("Hay sol afuera")
 7 | 
 8 | # Obtén la similitud entre el doc1 y el doc2
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_md")
 4 | 
 5 | doc = nlp("TV y libros")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Obtén la similitud entre los tokens "TV" y "libros"
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/es/exc_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_md")
 4 | 
 5 | doc = nlp(
 6 |     "Estuvimos en un restaurante genial. Luego, fuimos a un bar muy divertido."
 7 | )
 8 | 
 9 | # Crea los spans para "restaurante genial" y "bar muy divertido"
10 | span1 = ____
11 | span2 = ____
12 | 
13 | # Obtén la similitud entre los dos spans
14 | similarity = ____.____(____)
15 | print(similarity)
16 | 


--------------------------------------------------------------------------------
/exercises/es/exc_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carga el modelo es_core_news_sm
 4 | nlp = ____
 5 | 
 6 | # Imprime en pantalla los nombres de los componentes del pipeline 
 7 | print(____.____)
 8 | 
 9 | # Imprime en pantalla el pipeline entero de tuples (name, component)
10 | print(____.____)
11 | 


--------------------------------------------------------------------------------
/exercises/es/exc_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("es_core_news_sm")
 5 | 
 6 | with open("exercises/es/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Procesa los textos e imprime los verbos en pantalla
10 | for text in TEXTS:
11 |     doc = nlp(text)
12 |     print([token.text for token in doc if token.pos_ == "VERB"])
13 | 


--------------------------------------------------------------------------------
/exercises/es/exc_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("es_core_news_sm")
 5 | 
 6 | with open("exercises/es/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Procesa los textos e imprime las entidades en pantalla
10 | docs = [nlp(text) for text in TEXTS]
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/es/exc_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("es")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Crea una lista de patrones para el PhraseMatcher
8 | patterns = [nlp(person) for person in people]
9 | 


--------------------------------------------------------------------------------
/exercises/es/exc_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | text = (
 5 |     "Chick-fil-A es una cadena de restaurantes de comida rápida "
 6 |     "americana con sede en la ciudad de College Park, Georgia, "
 7 |     "especializada en sándwiches de pollo."
 8 | )
 9 | 
10 | # Únicamente convierte el texto en tokens
11 | doc = nlp(text)
12 | print([token.text for token in doc])
13 | 


--------------------------------------------------------------------------------
/exercises/es/exc_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ ____ --___ ____ --____ ____
2 | 


--------------------------------------------------------------------------------
/exercises/es/exc_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/es/exc_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____
2 | 


--------------------------------------------------------------------------------
/exercises/es/solution_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy
 2 | import spacy
 3 | 
 4 | # Crea el objeto nlp para procesar inglés
 5 | nlp = spacy.blank("en")
 6 | 
 7 | # Procesa un texto (aquí dice "Esta es una oración" en inglés)
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Imprime en pantalla el texto del documento
11 | print(doc.text)
12 | 
13 | 


--------------------------------------------------------------------------------
/exercises/es/solution_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy
 2 | import spacy
 3 | 
 4 | # Crea el objeto nlp para procesar alemán
 5 | nlp = spacy.blank("de")
 6 | 
 7 | # Procesa un texto (aquí dice "Saludos cordiales!" en alemán)
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Imprime en pantalla el texto del documento
11 | print(doc.text)
12 | 
13 | 


--------------------------------------------------------------------------------
/exercises/es/solution_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy
 2 | import spacy
 3 | 
 4 | # Crea el objeto nlp para procesar español
 5 | nlp = spacy.blank("es")
 6 | 
 7 | # Procesa un texto
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Imprime en pantalla el texto del documento
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/es/solution_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importa spaCy y crea el objeto nlp para procesar español
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("es")
 5 | 
 6 | # Procesa el texto
 7 | doc = nlp("Me gustan las panteras negras y los leones.")
 8 | 
 9 | # Selecciona el primer token
10 | first_token = doc[0]
11 | 
12 | # Imprime en pantalla el texto del token
13 | print(first_token.text)
14 | 


--------------------------------------------------------------------------------
/exercises/es/solution_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carga el modelo "es_core_news_sm"
 4 | nlp = spacy.load("es_core_news_sm")
 5 | 
 6 | text = (
 7 |     "De acuerdo con la revista global de negocios Fortune, Apple fue "
 8 |     "la empresa más admirada en el mundo entre 2008 y 2012."
 9 | )
10 | 
11 | # Procesa el texto
12 | doc = nlp(text)
13 | 
14 | # Imprime en pantalla el texto del documento
15 | print(doc.text)
16 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | doc = nlp("Yo tengo un gato")
 5 | 
 6 | # Busca el hash para la palabra "gato"
 7 | gato_hash = nlp.vocab.strings["gato"]
 8 | print(gato_hash)
 9 | 
10 | # Busca el gato_hash para obtener el string
11 | gato_string = nlp.vocab.strings[gato_hash]
12 | print(gato_string)
13 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | doc = nlp("David Bowie tiene el label PER")
 5 | 
 6 | # Busca el hash para el label del string "PER"
 7 | person_hash = nlp.vocab.strings["PER"]
 8 | print(person_hash)
 9 | 
10 | # Busca el person_hash para obtener el string
11 | person_string = nlp.vocab.strings[person_hash]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | 
 5 | # Importa la clase Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # El texto deseado: "spaCy es divertido!"
 9 | words = ["spaCy", "es", "divertido", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # Crea un Doc a partir de las palabras y los espacios
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | 
 5 | # Importa la clase Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # El texto deseado: "¡Vamos, empieza!"
 9 | words = ["¡", "Vamos", ",", "empieza", "!"]
10 | spaces = [False, False, True, False, False]
11 | 
12 | # Crea un Doc a partir de las palabras y los espacios
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | 
 5 | # Importa la clase Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # El texto deseado: "¡¿En serio?!"
 9 | words = ["¡", "¿", "En", "serio", "?", "!"]
10 | spaces = [False, False, True, False, False, False]
11 | 
12 | # Crea un Doc a partir de las palabras y los espacios
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carga el modelo es_core_news_md
 4 | nlp = spacy.load("es_core_news_md")
 5 | 
 6 | # Procesa un texto
 7 | doc = nlp("Hoy hice pan de banano")
 8 | 
 9 | # Obtén el vector para el token "banano"
10 | banano_vector = doc[4].vector
11 | print(banano_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_md")
 4 | 
 5 | doc1 = nlp("Es un cálido día de verano")
 6 | doc2 = nlp("Hay sol afuera")
 7 | 
 8 | # Obtén la similitud entre el doc1 y el doc2
 9 | similarity = doc1.similarity(doc2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_md")
 4 | 
 5 | doc = nlp("TV y libros")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Obtén la similitud entre los tokens "TV" y "libros"
 9 | similarity = token1.similarity(token2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/es/solution_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_md")
 4 | 
 5 | doc = nlp(
 6 |     "Estuvimos en un restaurante genial. Luego, fuimos a un bar muy divertido."
 7 | )
 8 | 
 9 | # Crea los spans para "restaurante genial" y "bar muy divertido"
10 | span1 = doc[3:5]
11 | span2 = doc[11:14]
12 | 
13 | # Obtén la similitud entre los dos spans
14 | similarity = span1.similarity(span2)
15 | print(similarity)
16 | 


--------------------------------------------------------------------------------
/exercises/es/solution_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carga el modelo es_core_news_sm
 4 | nlp = spacy.load("es_core_news_sm")
 5 | 
 6 | # Imprime en pantalla los nombres de los componentes del pipeline
 7 | print(nlp.pipe_names)
 8 | 
 9 | # Imprime en pantalla el pipeline entero de tuples (name, component)
10 | print(nlp.pipeline)
11 | 


--------------------------------------------------------------------------------
/exercises/es/solution_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("es_core_news_sm")
 5 | 
 6 | with open("exercises/es/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Procesa los textos e imprime los verbos en pantalla
10 | for doc in nlp.pipe(TEXTS):
11 |     print([token.text for token in doc if token.pos_ == "VERB"])
12 | 


--------------------------------------------------------------------------------
/exercises/es/solution_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("es_core_news_sm")
 5 | 
 6 | with open("exercises/es/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Procesa los textos e imprime las entidades en pantalla
10 | docs = list(nlp.pipe(TEXTS))
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/es/solution_03_14_03.py:
--------------------------------------------------------------------------------
1 | from spacy.lang.es import Spanish
2 | 
3 | nlp = Spanish()
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Crea una lista de patrones para el PhraseMatcher
8 | patterns = list(nlp.pipe(people))
9 | 


--------------------------------------------------------------------------------
/exercises/es/solution_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("es_core_news_sm")
 4 | text = (
 5 |     "Chick-fil-A es una cadena de restaurantes de comida rápida "
 6 |     "americana con sede en la ciudad de College Park, Georgia, "
 7 |     "especializada en sándwiches de pollo."
 8 | )
 9 | 
10 | # Únicamente convierte el texto en tokens
11 | doc = nlp.make_doc(text)
12 | print([token.text for token in doc])
13 | 


--------------------------------------------------------------------------------
/exercises/es/solution_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy init config ./config.cfg --lang es --pipeline ner
2 | 


--------------------------------------------------------------------------------
/exercises/es/solution_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/es/solution_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy train ./exercises/en/config_gadget.cfg --output ./output --paths.train ./exercises/en/train_gadget.spacy --paths.dev ./exercises/en/dev_gadget.spacy
2 | 


--------------------------------------------------------------------------------
/exercises/es/test_02_02_01.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert gato_hash == nlp.vocab.strings["gato"], "¿Asignaste el hash correcto?"
 3 |     assert 'nlp.vocab.strings["gato"]' in __solution__
 4 |     assert gato_string == "gato", "¿Obtuviste el string correcto?"
 5 |     assert (
 6 |         "nlp.vocab.strings[gato_hash]" in __solution__
 7 |     ), "¿Obtuviste el string usando el hash?"
 8 | 
 9 |     __msg__.good("¡Muy buen trabajo!")
10 | 


--------------------------------------------------------------------------------
/exercises/es/test_02_02_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert person_hash == nlp.vocab.strings["PER"], "¿Asignaste el hash correcto?"
 3 |     assert 'nlp.vocab.strings["PER"]' in __solution__
 4 |     assert person_string == "PER", "¿Obtuviste el string correcto?"
 5 |     assert (
 6 |         "nlp.vocab.strings[person_hash]" in __solution__
 7 |     ), "¿Obtuviste el string usando el hash?"
 8 | 
 9 |     __msg__.good("¡Buen trabajo!")
10 | 


--------------------------------------------------------------------------------
/exercises/es/test_02_05_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "from spacy.tokens import Doc" in __solution__
4 |     ), "¿Estás importando la clase Doc correctamente?"
5 |     assert doc.text == "spaCy es divertido!", "¿Creaste el Doc correctamente?"
6 |     assert "print(doc.text)" in __solution__, "¿Estás imprimiendo en pantalla el texto del Doc?"
7 |     __msg__.good("¡Bien hecho!")
8 | 


--------------------------------------------------------------------------------
/exercises/es/test_02_10_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc1.similarity(doc2)" or "doc2.similarity(doc1)" in __solution__
4 |     ), "¿Estás comparando la similitud entre los dos docs?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "El valor de la similitud debe ser de punto flotante. ¿Lo calculaste correctamente?"
8 |     __msg__.good("¡Bien hecho!")
9 | 


--------------------------------------------------------------------------------
/exercises/es/test_02_10_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "token1.similarity(token2)" or "token2.similarity(token1)" in __solution__
4 |     ), "¿Estás comparando la similitud entre los dos tokens?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "El valor de la similitud debe ser de punto flotante. ¿Lo calculaste correctamente?"
8 |     __msg__.good("¡Muy bien hecho!")
9 | 


--------------------------------------------------------------------------------
/exercises/es/test_03_14_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "for doc in nlp.pipe(TEXTS)" in __solution__
4 |     ), "¿Estás iterando sobre los docs que fueron devueltos usando <code>yield<code> por nlp.pipe?"
5 |     __msg__.good("¡Bien!")
6 | 


--------------------------------------------------------------------------------
/exercises/es/test_03_14_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "docs = list(nlp.pipe(TEXTS))" in __solution__
4 |     ), "¿Estás usando nlp.pipe envuelto en una lista?"
5 |     __msg__.good("¡Muy buen trabajo!")
6 | 


--------------------------------------------------------------------------------
/exercises/es/test_03_14_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "patterns = list(nlp.pipe(people))" in __solution__
 4 |     ), "¿Estás usando nlp.pipe envuelto en una lista?"
 5 | 
 6 |     __msg__.good(
 7 |         "¡Buen trabajo! Ahora continuemos con un ejemplo práctico que usa nlp.pipe "
 8 |         "para procesar documentos con metadatos adicionales."
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/es/test_03_16_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc = nlp.make_doc(text)" in __solution__
4 |         or "doc = nlp.tokenizer(text)" in __solution__
5 |     ), "¿Solo estás convirtiendo el texto en tokens?"
6 | 
7 |     __msg__.good("¡Bien hecho!")
8 | 


--------------------------------------------------------------------------------
/exercises/es/test_03_16_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         'with nlp.select_pipes(disable=["parser"])' in __solution__
 4 |     ), "¿Estás usando nlp.select_pipes con los componentes correctos?"
 5 | 
 6 |     __msg__.good(
 7 |         "¡Perfecto! Ahora que has practicado los consejos y trucos de rendimiento, "
 8 |         "puedes pasar al siguiente capítulo y entrenar modelos de redes neurales de spaCy."
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/es/train_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/es/train_gadget.spacy


--------------------------------------------------------------------------------
/exercises/fr/dev_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/fr/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/fr/exc_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importe spaCy
 2 | import ____
 3 | 
 4 | # Crée l'objet nlp français
 5 | nlp = ____
 6 | 
 7 | # Traite un texte
 8 | doc = nlp("Ceci est une phrase.")
 9 | 
10 | # Affiche le texte du document
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importe spaCy
 2 | import ____
 3 | 
 4 | # Crée l'objet nlp anglais
 5 | nlp = ____
 6 | 
 7 | # Traite un texte (il signifie "Ceci est une phrase" en anglais)
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Affiche le texte du document
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importe spaCy
 2 | import ____
 3 | 
 4 | # Crée l'objet nlp espagnol
 5 | nlp = ____
 6 | 
 7 | # Traite un texte (il signifie "Comment vas-tu ?" en espagnol)
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Affiche le texte du document
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importe spacy et crée l'objet nlp français
 2 | import ____
 3 | 
 4 | nlp = ____
 5 | 
 6 | # Traite le texte
 7 | doc = ____("La forêt est peuplée de loups gris et renards roux.")
 8 | 
 9 | # Sélectionne le premier token
10 | first_token = doc[____]
11 | 
12 | # Affiche le texte du premier token
13 | print(first_token.____)
14 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Charge le pipeline "fr_core_news_sm"
 4 | nlp = ____
 5 | 
 6 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne."
 7 | 
 8 | # Traite le texte
 9 | doc = ____
10 | 
11 | # Affiche le texte du document
12 | print(____.____)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_sm")
 4 | 
 5 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne."
 6 | 
 7 | # Traite le texte
 8 | doc = ____
 9 | 
10 | # Itère sur les entités prédites
11 | for ent in ____.____:
12 |     # Affiche le texte de l'entité et son label
13 |     print(ent.____, ____.____)
14 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | doc = nlp("J'ai un chat")
 5 | 
 6 | # Recherche le hash pour le mot "chat"
 7 | cat_hash = ____.____.____[____]
 8 | print(cat_hash)
 9 | 
10 | # Recherche cat_hash pour obtenir la chaine
11 | cat_string = ____.____.____[____]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | doc = nlp("David Bowie a le label PER")
 5 | 
 6 | # Cherche le hash pour le label de chaine "PER"
 7 | person_hash = ____.____.____[____]
 8 | print(person_hash)
 9 | 
10 | # Cherche person_hash pour obtenir la chaine
11 | person_string = ____.____.____[____]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | 
 5 | # Importe la classe Doc
 6 | from ____ import ____
 7 | 
 8 | # Texte désiré : "spaCy est cool."
 9 | words = ["spaCy", "est", "cool", "."]
10 | spaces = [True, True, False, False]
11 | 
12 | # Crée un Doc à partir des mots et des espaces
13 | doc = ____(____, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | 
 5 | # Importe la classe Doc
 6 | from ____ import ____
 7 | 
 8 | # Texte désiré : "Allez, on commence !"
 9 | words = ["Allez", ",", "on", "commence", "!"]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Crée un Doc à partir des mots et des espaces
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | 
 5 | # Importe la classe Doc
 6 | from ____ import ____
 7 | 
 8 | # Texte désiré : "Oh, vraiment ?!"
 9 | words = [____, ____, ____, ____, ____]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Crée un Doc à partir des mots et des espaces
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Charge le pipeline fr_core_news_md
 4 | nlp = ____
 5 | 
 6 | # Traite le texte
 7 | doc = nlp("Deux bananes en pyjamas")
 8 | 
 9 | # Obtiens le vecteur pour le token "bananes"
10 | bananas_vector = ____.____
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_md")
 4 | 
 5 | doc1 = nlp("Le temps est au beau fixe")
 6 | doc2 = nlp("Le ciel est clair")
 7 | 
 8 | # Obtiens la similarité entre doc1 et doc2
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_md")
 4 | 
 5 | doc = nlp("télé et livres")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Obtiens la similarité entre les tokens "télé" et "livres"
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_md")
 4 | 
 5 | doc = nlp("C'était un super restaurant. Ensuite nous sommes allés dans un bar vraiment sympa.")
 6 | 
 7 | # Crée des spans pour "super restaurant" et "bar vraiment sympa"
 8 | span1 = ____
 9 | span2 = ____
10 | 
11 | # Obtiens la similarité entre les spans
12 | similarity = ____.____(____)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Charge le pipeline fr_core_news_sm
 4 | nlp = ____
 5 | 
 6 | # Affiche les noms des composants du pipeline
 7 | print(____.____)
 8 | 
 9 | # Affiche tous les tuples de (name, component) du pipeline
10 | print(____.____)
11 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("fr_core_news_sm")
 5 | 
 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Traite les textes et affiche les adjectifs
10 | for text in TEXTS:
11 |     doc = nlp(text)
12 |     print([token.text for token in doc if token.pos_ == "ADJ"])
13 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("fr_core_news_sm")
 5 | 
 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Traite les textes et affiche les entités
10 | docs = [nlp(text) for text in TEXTS]
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("fr")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Crée une liste de motifs pour le PhraseMatcher
8 | patterns = [nlp(person) for person in people]
9 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_sm")
 4 | text = (
 5 |     "Le groupe aéronautique Airbus construit des avions et des "
 6 |     "hélicoptères vendus dans le monde entier. Le siège opérationnel du "
 7 |     "groupe est situé en France à Toulouse dans la région Occitanie."
 8 | )
 9 | 
10 | # Tokenise seulement le texte
11 | doc = nlp(text)
12 | print([token.text for token in doc])
13 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ ____ --___ ____ --____ ____
2 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/fr/exc_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____
2 | 


--------------------------------------------------------------------------------
/exercises/fr/iphone.json:
--------------------------------------------------------------------------------
1 | [
2 |   "Comment précommander l'iPhone X",
3 |   "l'iPhone X arrive",
4 |   "Dois-je dépenser 1.000 € pour l'iPhone X ?",
5 |   "Les tests de l'iPhone 8 sont là",
6 |   "iPhone 11 contre iPhone 8 : quelles sont les différences ?",
7 |   "Il me faut un nouveau téléphone ! Des suggestions à me faire ?"
8 | ]
9 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importe spaCy
 2 | import spacy
 3 | 
 4 | # Crée l'objet nlp français
 5 | nlp = spacy.blank("fr")
 6 | 
 7 | # Traite un texte
 8 | doc = nlp("Ceci est une phrase.")
 9 | 
10 | # Affiche le texte du document
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importe spaCy
 2 | import spacy
 3 | 
 4 | # Crée l'objet nlp anglais
 5 | nlp = spacy.blank("en")
 6 | 
 7 | # Traite un texte (il signifie "Ceci est une phrase" en anglais)
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Affiche le texte du document
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importe spaCy
 2 | import spacy
 3 | 
 4 | # Crée l'objet nlp espagnol
 5 | nlp = spacy.blank("es")
 6 | 
 7 | # Traite un texte (il signifie "Comment vas-tu ?" en espagnol)
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Affiche le texte du document
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importe spacy et crée l'objet nlp français
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("fr")
 5 | 
 6 | # Traite le texte
 7 | doc = nlp("La forêt est peuplée de loups gris et renards roux.")
 8 | 
 9 | # Sélectionne le premier token
10 | first_token = doc[0]
11 | 
12 | # Affiche le texte du premier token
13 | print(first_token.text)
14 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Charge le pipeline "fr_core_news_sm"
 4 | nlp = spacy.load("fr_core_news_sm")
 5 | 
 6 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne."
 7 | 
 8 | # Traite le texte
 9 | doc = nlp(text)
10 | 
11 | # Affiche le texte du document
12 | print(doc.text)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_sm")
 4 | 
 5 | text = "Apple a été créée en 1976 par Steve Wozniak, Steve Jobs et Ron Wayne."
 6 | 
 7 | # Traite le texte
 8 | doc = nlp(text)
 9 | 
10 | # Itère sur les entités prédites
11 | for ent in doc.ents:
12 |     # Affiche le texte de l'entité et son label
13 |     print(ent.text, ent.label_)
14 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | doc = nlp("J'ai un chat")
 5 | 
 6 | # Recherche le hash pour le mot "chat"
 7 | cat_hash = nlp.vocab.strings["chat"]
 8 | print(cat_hash)
 9 | 
10 | # Recherche chat_hash pour obtenir la chaine
11 | cat_string = nlp.vocab.strings[cat_hash]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | doc = nlp("David Bowie a le label PER")
 5 | 
 6 | # Cherche le hash pour le label de chaine "PER"
 7 | person_hash = nlp.vocab.strings["PER"]
 8 | print(person_hash)
 9 | 
10 | # Cherche person_hash pour obtenir la chaine
11 | person_string = nlp.vocab.strings[person_hash]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | 
 5 | # Importe la classe Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Texte désiré : "spaCy est cool."
 9 | words = ["spaCy", "est", "cool", "."]
10 | spaces = [True, True, False, False]
11 | 
12 | # Crée un Doc à partir des mots et des espaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | 
 5 | # Importe la classe Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Texte désiré : "Allez, on commence !"
 9 | words = ["Allez", ",", "on", "commence", "!"]
10 | spaces = [False, True, True, True, False]
11 | 
12 | # Crée un Doc à partir des mots et des espaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("fr")
 4 | 
 5 | # Importe la classe Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Texte désiré : "Oh, vraiment ?!"
 9 | words = ["Oh", ",", "vraiment", "?", "!"]
10 | spaces = [False, True, True, False, False]
11 | 
12 | # Crée un Doc à partir des mots et des espaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Charge le pipeline fr_core_news_md
 4 | nlp = spacy.load("fr_core_news_md")
 5 | 
 6 | # Traite le texte
 7 | doc = nlp("Deux bananes en pyjamas")
 8 | 
 9 | # Obtiens le vecteur pour le token "bananes"
10 | bananas_vector = doc[1].vector
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_md")
 4 | 
 5 | doc1 = nlp("Le temps est au beau fixe")
 6 | doc2 = nlp("Le ciel est clair")
 7 | 
 8 | # Obtiens la similarité entre doc1 et doc2
 9 | similarity = doc1.similarity(doc2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_md")
 4 | 
 5 | doc = nlp("télé et livres")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Obtiens la similarité entre les tokens "télé" et "livres"
 9 | similarity = token1.similarity(token2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_md")
 4 | 
 5 | doc = nlp("C'était un super restaurant. Ensuite nous sommes allés dans un bar vraiment sympa.")
 6 | 
 7 | # Crée des spans pour "super restaurant" et "bar vraiment sympa"
 8 | span1 = doc[3:5]
 9 | span2 = doc[12:15]
10 | 
11 | # Obtiens la similarité entre les spans
12 | similarity = span1.similarity(span2)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Charge le pipeline fr_core_news_sm
 4 | nlp = spacy.load("fr_core_news_sm")
 5 | 
 6 | # Affiche les noms des composants du pipeline
 7 | print(nlp.pipe_names)
 8 | 
 9 | # Affiche tous les tuples de (name, component) du pipeline
10 | print(nlp.pipeline)
11 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("fr_core_news_sm")
 5 | 
 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Traite les textes et affiche les adjectifs
10 | for doc in nlp.pipe(TEXTS):
11 |     print([token.text for token in doc if token.pos_ == "ADJ"])
12 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("fr_core_news_sm")
 5 | 
 6 | with open("exercises/fr/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Traite les textes et affiche les entités
10 | docs = list(nlp.pipe(TEXTS))
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("fr")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Crée une liste de motifs pour le PhraseMatcher
8 | patterns = list(nlp.pipe(people))
9 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("fr_core_news_sm")
 4 | text = (
 5 |     "Le groupe aéronautique Airbus construit des avions et des "
 6 |     "hélicoptères vendus dans le monde entier. Le siège opérationnel du "
 7 |     "groupe est situé en France à Toulouse dans la région Occitanie."
 8 | )
 9 | 
10 | # Tokenise seulement le texte
11 | doc = nlp.make_doc(text)
12 | print([token.text for token in doc])
13 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy init config ./config.cfg --lang fr --pipeline ner
2 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/fr/solution_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy train ./exercises/fr/config_gadget.cfg --output ./output --paths.train ./exercises/fr/train_gadget.spacy --paths.dev ./exercises/fr/dev_gadget.spacy
2 | 


--------------------------------------------------------------------------------
/exercises/fr/test_02_05_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "from spacy.tokens import Doc" in __solution__
4 |     ), "Importes-tu correctement la classe Doc ?"
5 |     assert doc.text == "spaCy est cool.", "Es-tu certain d'avoir créé correctement le Doc ?"
6 |     assert "print(doc.text)" in __solution__, "Affiches-tu le texte du Doc ?"
7 |     __msg__.good("Bien joué !")
8 | 


--------------------------------------------------------------------------------
/exercises/fr/test_02_10_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc1.similarity(doc2)" or "doc2.similarity(doc1)" in __solution__
4 |     ), "Compares-tu la similarité entre les deux docs ?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "La valeur de similarité doit être un nombre flottant. L'as-tu calculé correctement ?"
8 |     __msg__.good("Bien joué !")
9 | 


--------------------------------------------------------------------------------
/exercises/fr/test_02_10_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "token1.similarity(token2)" or "token2.similarity(token1)" in __solution__
4 |     ), "Compares-tu la similarité entre les deux tokens ?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "La valeur de similarité doit être un nombre flottant. L'as-tu calculé correctement ?"
8 |     __msg__.good("Bien joué !")
9 | 


--------------------------------------------------------------------------------
/exercises/fr/test_03_14_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "for doc in nlp.pipe(TEXTS)" in __solution__
4 |     ), "Itères-tu sur les docs générés par nlp.pipe ?"
5 |     __msg__.good("Joli !")
6 | 


--------------------------------------------------------------------------------
/exercises/fr/test_03_14_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "docs = list(nlp.pipe(TEXTS))" in __solution__
4 |     ), "Utilises-tu nlp.pipe enveloppé dans une liste ?"
5 |     __msg__.good("Super boulot !")
6 | 


--------------------------------------------------------------------------------
/exercises/fr/test_03_14_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "patterns = list(nlp.pipe(people))" in __solution__
 4 |     ), "Utilises-tu nlp.pipe enveloppé dans une liste ?"
 5 | 
 6 |     __msg__.good(
 7 |         "Bon boulot ! Passons à un exemple pratique qui utilise nlp.pipe "
 8 |         "pour traiter des documents avec des métadonnées supplémentaires."
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/fr/test_03_16_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc = nlp.make_doc(text)" in __solution__
4 |         or "doc = nlp.tokenizer(text)" in __solution__
5 |     ), "Est-ce que tu tokenises seulement le texte ?"
6 | 
7 |     __msg__.good("Bien joué !")
8 | 


--------------------------------------------------------------------------------
/exercises/fr/train_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/fr/train_gadget.spacy


--------------------------------------------------------------------------------
/exercises/ja/dev_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/ja/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/ja/exc_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポート
 2 | import ____
 3 | 
 4 | # 英語のnlpオブジェクトを作成
 5 | nlp = ____
 6 | 
 7 | # テキストを処理
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # docのテキストをプリント
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポート
 2 | import ____
 3 | 
 4 | # ドイツ語のnlpオブジェクトを作成
 5 | nlp = ____
 6 | 
 7 | # テキストを処理（ドイツ語で「よろしく！」の意味）
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # docのテキストをプリント
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # spacyをインポート
 2 | import ____
 3 | 
 4 | # スペイン語のnlpオブジェクトを作成
 5 | nlp = ____
 6 | 
 7 | # テキストを処理（スペイン語で「おげんきですか？」の意味）
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # docのテキストをプリント
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_02_04.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポート
 2 | import ____
 3 | 
 4 | # 日本語のnlpオブジェクトを作成
 5 | nlp = ____
 6 | 
 7 | # テキストを処理
 8 | doc = nlp("有難うございます。")
 9 | 
10 | # docのテキストをプリント
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポートして、日本語のnlpオブジェクトを作成
 2 | import ____
 3 | 
 4 | nlp = ____
 5 | 
 6 | # テキストを処理
 7 | doc = ____("私はツリーカンガルーとイッカクが好きです。")
 8 | 
 9 | # 最初のトークンを選択
10 | first_token = doc[____]
11 | 
12 | # 最初のトークンのテキストをプリント
13 | print(first_token.____)
14 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_03_02.py:
--------------------------------------------------------------------------------
 1 | # spacyをインポートして、日本語のnlpオブジェクトを作成
 2 | import spacy
 3 | 
 4 | nlp = ____
 5 | 
 6 | # テキストを処理
 7 | doc = ____("私はツリーカンガルーとイルカが好きです。")
 8 | 
 9 | # 「ツリーカンガルー」のスライスを選択
10 | tree_kangaroos = ____
11 | print(tree_kangaroos.text)
12 | 
13 | # 「ツリーカンガルーとイッカク」のスライスを選択
14 | tree_kangaroos_and_narwhals = ____
15 | print(tree_kangaroos_and_narwhals.text)
16 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 「ja_core_news_sm」パイプラインをロード
 4 | nlp = ____
 5 | 
 6 | text = "公式発表：Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。"
 7 | 
 8 | # テキストを処理
 9 | doc = ____
10 | 
11 | # docのテキストをプリント
12 | print(____.____)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_08_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | 
 5 | text = "公式発表：Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。"
 6 | 
 7 | # テキストを処理
 8 | doc = ____
 9 | 
10 | for token in doc:
11 |     # トークンの文字列、品詞タグ、依存関係ラベルを取得
12 |     token_text = ____.____
13 |     token_pos = ____.____
14 |     token_dep = ____.____
15 |     # フォーマットしてプリント
16 |     print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}")
17 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | 
 5 | text = "公式発表：Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。"
 6 | 
 7 | # テキストを処理
 8 | doc = ____
 9 | 
10 | # 予測された固有表現をイテレート
11 | for ent in ____.____:
12 |     # 固有表現の文字列とラベルをプリント
13 |     print(ent.____, ____.____)
14 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_01_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | 
 5 | text = "静岡県にある三保の松原は世界遺産の一部です。"
 6 | 
 7 | # テキストを処理
 8 | doc = ____
 9 | 
10 | # 固有表現をイテレート
11 | for ____ in ____.____:
12 |     # 固有表現の文字列とラベルをプリント
13 |     print(____.____, ____.____)
14 | 
15 | # 三保の松原のスパンを取得
16 | mihonomatsubara = ____
17 | 
18 | # スパンの文字列をプリント
19 | print("Missing entity:", mihonomatsubara.text)
20 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | doc = nlp("私はネコを飼っています")
 5 | 
 6 | # 単語「ネコ」のハッシュを引く
 7 | cat_hash = ____.____.____[____]
 8 | print(cat_hash)
 9 | 
10 | # cat_hashを使って文字列を引く
11 | cat_string = ____.____.____[____]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | doc = nlp("デヴィッド・ボウイはPERSONです")
 5 | 
 6 | # ラベル「PERSON」のハッシュを引く
 7 | person_hash = ____.____.____[____]
 8 | print(person_hash)
 9 | 
10 | # person_hashを引いて文字列を取得
11 | person_string = ____.____.____[____]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | 
 5 | # Docクラスをインポート
 6 | from ____ import ____
 7 | 
 8 | # 作りたいテキスト：「spaCyは素晴らしい！」
 9 | words = ["spaCy", "は", "素晴らしい", "！"]
10 | spaces = [False, False, False, False]
11 | 
12 | # wordsとspacesからDocを作成
13 | doc = ____(____, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | 
 5 | # Docクラスをインポート
 6 | from ____ import ____
 7 | 
 8 | # 作りたいテキスト：「さあ、始めよう！」
 9 | words = ["さあ", "、", "初めよう", "！"]
10 | spaces = [____, ____, ____, ____]
11 | 
12 | # wordsとspacesからDocを作成
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | 
 5 | # Docクラスをインポート
 6 | from ____ import ____
 7 | 
 8 | # 作成したいテキスト：「本当ですか？！」
 9 | words = [____, ____, ____, ____, ____]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Docをwordsとspacesから作成
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # ja_core_news_mdモデルをロード
 4 | nlp = ____
 5 | 
 6 | # テキストを処理
 7 | doc = nlp("パジャマを着た2つのバナナ")
 8 | 
 9 | # 「バナナ」のベクトルを取得
10 | bananas_vector = ____.____
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_md")
 4 | 
 5 | doc1 = nlp("暖かい夏の日です")
 6 | doc2 = nlp("外は晴れています")
 7 | 
 8 | # doc1とdoc2の類似度を取得
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_md")
 4 | 
 5 | doc = nlp("テレビと本")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # 「テレビ」と「本」の類似度を取得
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_md")
 4 | 
 5 | doc = nlp("素晴らしいレストランでした。その後、私達はとても素敵なバーに行きました。")
 6 | 
 7 | # 「素晴らしいレストラン」と「とても素敵なバー」のスパンを作る
 8 | span1 = ____
 9 | span2 = ____
10 | 
11 | # スパンの類似度をはかる
12 | similarity = ____.____(____)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # ja_core_news_sm モデルを読み込む
 4 | nlp = ____
 5 | 
 6 | # パイプラインの名前を表示
 7 | print(____.____)
 8 | 
 9 | # (name, component) のタプルからなるパイプライン情報を表示
10 | print(____.____)
11 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_09_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Token
 3 | 
 4 | nlp = spacy.blank("ja")
 5 | 
 6 | # デフォルト値がFalseである拡張属性「is_country」をトークンに追加
 7 | ____.____(____, ____=____)
 8 | 
 9 | # テキストを処理し、「スペイン」のトークンについてis_country属性をTrueにする
10 | doc = nlp("私はスペインに住んでいます。")
11 | ____ = True
12 | 
13 | # すべてのトークンについて、文字列とis_country属性を表示
14 | print([(____, ____) for token in doc])
15 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_09_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Token
 3 | 
 4 | nlp = spacy.blank("ja")
 5 | 
 6 | # トークンを受け取り、文字列を反転させたものを返すゲッターを定義
 7 | def get_reversed(token):
 8 |     return token.text[::-1]
 9 | 
10 | 
11 | # トークンの「reversed」プロパティ属性にget_reversedをゲッターとして登録
12 | ____.____(____, ____=____)
13 | 
14 | # テキストを処理し、それぞれのトークンについてreversed属性を表示
15 | doc = nlp("あらゆる一般化は間違っている。これも含めて。")
16 | for ____ in ____:
17 |     print("反転:", ____)
18 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Span
 3 | 
 4 | nlp = spacy.blank("ja")
 5 | 
 6 | # メソッドを定義
 7 | def to_html(span, tag):
 8 |     # スパンのテキストをHTMLタグに入れて返す
 9 |     return f"<{tag}>{span.text}</{tag}>"
10 | 
11 | 
12 | # to_htmlをスパンの「to_html」拡張属性に登録
13 | ____.____(____, ____=____)
14 | 
15 | # テキストを処理し、「strong」タグを用いてスパンのto_htmlメソッドを呼びだす
16 | doc = nlp("おはようございます、 これは文章です。")
17 | span = doc[0:3]
18 | print(____)
19 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("ja_core_news_sm")
 5 | 
 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # テキストを処理し、形容詞を表示
10 | for text in TEXTS:
11 |     doc = nlp(text)
12 |     print([token.text for token in doc if token.pos_ == "ADJ"])
13 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("ja_core_news_sm")
 5 | 
 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # テキストを処理し、固有表現を表示
10 | docs = [nlp(text) for text in TEXTS]
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("ja")
4 | 
5 | people = ["デヴィッド・ボウイ", "アンゲラ・メルケル", "レディー・ガガ"]
6 | 
7 | # PhraseMatcherのパターンのリストを作成
8 | patterns = [nlp(person) for person in people]
9 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | text = (
 5 |     "チックフィレイはジョージア州カレッジパークに本社を置く、"
 6 |     "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。"
 7 | )
 8 | 
 9 | # トークナイズのみ行う
10 | doc = nlp(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_03_16_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | text = (
 5 |     "チックフィレイはジョージア州カレッジパークに本社を置く、"
 6 |     "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。"
 7 | )
 8 | 
 9 | # parserを無効化
10 | with ____.____(____):
11 |     # テキストを処理する
12 |     doc = ____
13 |     # docの固有表現を表示
14 |     print(____)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_04_06.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 空の「ja」パイプラインを作成
 4 | nlp = ____
 5 | 
 6 | # 新しい固有表現抽出器を作成し、パイプラインに追加
 7 | ner = ____
 8 | 
 9 | # 「GADGET」ラベルを固有表現抽出器に追加
10 | ____.____
11 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ ____ --___ ____ --____ ____
2 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/ja/exc_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____
2 | 


--------------------------------------------------------------------------------
/exercises/ja/gadgets.json:
--------------------------------------------------------------------------------
1 | [
2 |     ["iPhone Xの注文方法", { "entities": [[0, 8, "GADGET"]] }],
3 |     ["iPhone Xが発売される", { "entities": [[0, 8, "GADGET"]] }],
4 |     ["iPhone Xに10万円の価値ある？", { "entities": [[0, 8, "GADGET"]] }],
5 |     ["iPhone 8のレビューはこれ！", { "entities": [[0, 8, "GADGET"]] }],
6 |     ["iPhoneのiOS11へのアップデートが今日ある", { "entities": [[0, 6, "GADGET"]] }],
7 |     ["新しいスマホが欲しい！どうしたらいい？", { "entities": [] }]
8 | ]
9 | 


--------------------------------------------------------------------------------
/exercises/ja/iphone.json:
--------------------------------------------------------------------------------
1 | [
2 |   "iPhone Xの注文方法",
3 |   "iPhone Xが発売される",
4 |   "iPhone Xに10万円の価値ある？",
5 |   "iPhone 8のレビューはここにある",
6 |   "iPhone 11とiPhone 8の違いは？",
7 |   "新しいスマホが欲しい！どうしたらいい？"
8 | ]
9 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポート
 2 | import spacy
 3 | 
 4 | # nlpオブジェクトを作成
 5 | nlp = spacy.blank("en")
 6 | 
 7 | # テキストを処理
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # docのテキストをプリント
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポート
 2 | import spacy
 3 | 
 4 | # ドイツ語のnlpオブジェクトを作成
 5 | nlp = spacy.blank("de")
 6 | 
 7 | # テキストを処理（ドイツ語で「よろしく！」の意味）
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # docのテキストをプリント
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # spacyをインポート
 2 | import spacy
 3 | 
 4 | # スペイン語のnlpオブジェクトを作成
 5 | nlp = spacy.blank("es")
 6 | 
 7 | # テキストを処理（スペイン語で「おげんきですか？」の意味）
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # docのテキストをプリント
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_02_04.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポート
 2 | import spacy
 3 | 
 4 | # nlpオブジェクトを作成
 5 | nlp = spacy.blank("ja")
 6 | 
 7 | # テキストを処理
 8 | doc = nlp("有難うございます。")
 9 | 
10 | # docのテキストをプリント
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポートし、日本語のnlpオブジェクトを作成
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("ja")
 5 | 
 6 | # テキストを処理
 7 | doc = nlp("私はツリーカンガルーとイッカクが好きです。")
 8 | 
 9 | # 最初のトークンを選択
10 | first_token = doc[0]
11 | 
12 | # 最初のトークンのテキストをプリント
13 | print(first_token.text)
14 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_03_02.py:
--------------------------------------------------------------------------------
 1 | # spaCyをインポートし、日本語のnlpオブジェクトを作成
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("ja")
 5 | 
 6 | # テキストを処理
 7 | doc = nlp("私はツリーカンガルーとイッカクが好きです。")
 8 | 
 9 | # 「ツリーカンガルー」のスライスを選択
10 | tree_kangaroos = doc[2:4]
11 | print(tree_kangaroos.text)
12 | 
13 | # 「ツリーカンガルーとイッカク」のスライスを選択
14 | tree_kangaroos_and_narwhals = doc[2:6]
15 | print(tree_kangaroos_and_narwhals.text)
16 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 「ja_core_news_sm」パイプラインをロード
 4 | nlp = spacy.load("ja_core_news_sm")
 5 | 
 6 | text = "公式発表：Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。"
 7 | 
 8 | # テキストを処理
 9 | doc = nlp(text)
10 | 
11 | # docのテキストをプリント
12 | print(doc.text)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_08_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | 
 5 | text = "公式発表：Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。"
 6 | 
 7 | # テキストを処理
 8 | doc = nlp(text)
 9 | 
10 | for token in doc:
11 |     # トークンの文字列、品詞タグ、依存関係ラベルを取得
12 |     token_text = token.text
13 |     token_pos = token.pos_
14 |     token_dep = token.dep_
15 |     # フォーマットしてプリント
16 |     print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}")
17 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | 
 5 | text = "公式発表：Appleが米国の上場企業として初めて時価評価額1兆ドルに到達しました。"
 6 | 
 7 | # テキストを処理
 8 | doc = nlp(text)
 9 | 
10 | # 予測された固有表現をイテレート
11 | for ent in doc.ents:
12 |     # 固有表現の文字列とラベルをプリント
13 |     print(ent.text, ent.label_)
14 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_01_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | 
 5 | text = "静岡県にある三保の松原は世界遺産の一部です。"
 6 | 
 7 | # テキストを処理
 8 | doc = nlp(text)
 9 | 
10 | # 固有表現をイテレート
11 | for ent in doc.ents:
12 |     # 固有表現の文字列とラベルをプリント
13 |     print(ent.text, ent.label_)
14 | 
15 | # 三保の松原のスパンを取得
16 | mihonomatsubara = doc[4:7]
17 | 
18 | # スパンの文字列をプリント
19 | print("Missing entity:", mihonomatsubara.text)


--------------------------------------------------------------------------------
/exercises/ja/solution_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | doc = nlp("私はネコを飼っています")
 5 | 
 6 | # 単語「ネコ」のハッシュを引く
 7 | cat_hash = nlp.vocab.strings["ネコ"]
 8 | print(cat_hash)
 9 | 
10 | # cat_hashを使って文字列を引く
11 | cat_string = nlp.vocab.strings[cat_hash]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | doc = nlp("デヴィッド・ボウイはPERSONです")
 5 | 
 6 | # ラベル「PERSON」のハッシュを引く
 7 | person_hash = nlp.vocab.strings["PERSON"]
 8 | print(person_hash)
 9 | 
10 | # person_hashを引いて文字列を取得
11 | person_string = nlp.vocab.strings[person_hash]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | 
 5 | # Docクラスをインポート
 6 | from spacy.tokens import Doc
 7 | 
 8 | # 作りたいテキスト：「spaCyは素晴らしい！」
 9 | words = ["spaCy", "は", "素晴らしい", "！"]
10 | spaces = [False, False, False, False]
11 | 
12 | # wordsとspacesからDocを作成
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | 
 5 | # Docクラスをインポート
 6 | from spacy.tokens import Doc
 7 | 
 8 | # 作りたいテキスト：「さあ、始めよう！」
 9 | words = ["さあ", "、", "始めよう", "！"]
10 | spaces = [False, False, False, False]
11 | 
12 | # wordsとspacesからDocを作成
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("ja")
 4 | 
 5 | # Docクラスをインポート
 6 | from spacy.tokens import Doc
 7 | 
 8 | # 作成したいテキスト：「本当ですか？！」
 9 | words = ["本当", "です", "か", "？", "！"]
10 | spaces = [False, False, False, False, False]
11 | 
12 | # Docをwordsとspacesから作成
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | doc = nlp("ベルリンはいい街だと思う")
 5 | 
 6 | # すべてのトークンの文字列と品詞タグを取得
 7 | for token in doc:
 8 |     # 現在のトークンが固有名詞かどうかをチェック
 9 |     if token.pos_ == "PROPN":
10 |         # 次のトークンが設置詞かどうかをチェック
11 |         if doc[token.i + 1].pos_ == "ADP":
12 |             print("設置詞の前の固有名詞が見つかりました:", token.text)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # ja_core_news_mdモデルをロード
 4 | nlp = spacy.load("ja_core_news_md")
 5 | 
 6 | # テキストを処理
 7 | doc = nlp("パジャマを着た2つのバナナ")
 8 | 
 9 | # 「バナナ」のベクトルを取得
10 | bananas_vector = doc[7].vector
11 | print(bananas_vector)
12 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_md")
 4 | 
 5 | doc1 = nlp("暖かい夏の日です")
 6 | doc2 = nlp("外は晴れています")
 7 | 
 8 | # doc1とdoc2の類似度を取得
 9 | similarity = doc1.similarity(doc2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_md")
 4 | 
 5 | doc = nlp("テレビと本")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # 「テレビ」と「本」の類似度を取得
 9 | similarity = token1.similarity(token2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_md")
 4 | 
 5 | doc = nlp("素晴らしいレストランでした。その後、私達はとても素敵なバーに行きました。")
 6 | 
 7 | # 「素晴らしいレストラン」と「とても素敵なバー」のスパンを作る
 8 | span1 = doc[0:2]
 9 | span2 = doc[11:15]
10 | 
11 | # スパンの類似度をはかる
12 | similarity = span1.similarity(span2)
13 | print(similarity)
14 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # ja_core_news_sm モデルを読み込む
 4 | nlp = spacy.load("ja_core_news_sm")
 5 | 
 6 | # パイプラインの名前を出力
 7 | print(nlp.pipe_names)
 8 | 
 9 | # (name, component) のタプルからなるパイプライン情報を表示
10 | print(nlp.pipeline)
11 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_03_09_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Token
 3 | 
 4 | nlp = spacy.blank("ja")
 5 | 
 6 | # デフォルト値がFalseである拡張属性「is_country」をトークンに追加
 7 | Token.set_extension("is_country", default=False)
 8 | 
 9 | # テキストを処理し、「スペイン」のトークンについてis_country属性をTrueにする
10 | doc = nlp("私はスペインに住んでいます。")
11 | doc[2]._.is_country = True
12 | 
13 | # すべてのトークンについて、文字列とis_country属性を表示
14 | print([(token.text, token._.is_country) for token in doc])
15 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("ja_core_news_sm")
 5 | 
 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # テキストを処理し、形容詞を表示
10 | for doc in nlp.pipe(TEXTS):
11 | 
12 |     print([token.text for token in doc if token.pos_ == "ADJ"])
13 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("ja_core_news_sm")
 5 | 
 6 | with open("exercises/ja/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # テキストを処理し、固有表現を表示
10 | docs = list(nlp.pipe(TEXTS))
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("ja")
4 | 
5 | people = ["デヴィッド・ボウイ", "アンゲラ・メルケル", "レディー・ガガ"]
6 | 
7 | # PhraseMatcherのパターンのリストを作成
8 | patterns = list(nlp.pipe(people))
9 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | text = (
 5 |     "チックフィレイはジョージア州カレッジパークに本社を置く、"
 6 |     "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。"
 7 | )
 8 | 
 9 | # トークナイズのみ行う
10 | doc = nlp.make_doc(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_03_16_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("ja_core_news_sm")
 4 | text = (
 5 |     "チックフィレイはジョージア州カレッジパークに本社を置く、"
 6 |     "チキンサンドを専門とするアメリカのファストフードレストランチェーンです。"
 7 | )
 8 | 
 9 | # parserを無効化
10 | with nlp.select_pipes(disable=["parser"]):
11 |     # テキストを処理する
12 |     doc = nlp(text)
13 |     # docの固有表現を表示
14 |     print(doc.ents)
15 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_04_06.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 空の「ja」モデルを作成
 4 | nlp = spacy.blank("ja")
 5 | 
 6 | # 新しい固有表現抽出器を作成し、パイプラインに追加
 7 | ner = nlp.add_pipe("ner")
 8 | 
 9 | # 「GADGET」ラベルを固有表現抽出器に追加
10 | ner.add_label("GADGET")
11 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy init config ./config.cfg --lang ja --pipeline ner
2 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/ja/solution_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy train ./exercises/ja/config_gadget.cfg --output ./output --paths.train ./exercises/ja/train_gadget.spacy --paths.dev ./exercises/ja/dev_gadget.spacy
2 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_02_01.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.en
 4 | 
 5 |     assert isinstance(
 6 |         nlp, spacy.lang.en.English
 7 |     ), "nlpオブジェクトはEnglishクラスのインスタンスでなければなりません"
 8 |     assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか？"
 9 |     assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか？"
10 | 
11 |     __msg__.good("正解です！")
12 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_02_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.de
 4 | 
 5 |     assert isinstance(nlp, spacy.lang.de.German), "nlpオブジェクトはGermanクラスのインスタンスでなければなりません"
 6 |     assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか？"
 7 |     assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか？"
 8 | 
 9 |     __msg__.good("正解です！")
10 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_02_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.es
 4 | 
 5 |     assert isinstance(
 6 |         nlp, spacy.lang.es.Spanish
 7 |     ), "nlpオブジェクトはSpanishクラスのインスタンスでなければなりません"
 8 |     assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか？"
 9 |     assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか？"
10 | 
11 |     __msg__.good("Perfecto! doc、トークン、スパンに行きましょう。")
12 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_02_04.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.ja
 4 | 
 5 |     assert isinstance(
 6 |         nlp, spacy.lang.ja.Japanese
 7 |     ), "nlpオブジェクトはJapaneseクラスのインスタンスでなければなりません"
 8 |     assert isinstance(doc, spacy.tokens.Doc), "テキストをnlpオブジェクトで処理してdocを作成しましたか？"
 9 |     assert "print(doc.text)" in __solution__, "doc.textをプリントしましたか？"
10 | 
11 |     __msg__.good("完璧です! doc、トークン、スパンに行きましょう。")
12 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_03_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert doc.text == "私はツリーカンガルーとイッカクが好きです。", "テキストをちゃんと処理しましたか？"
3 |     assert first_token == doc[0], "最初のトークンを選択しましたか？"
4 |     assert "print(first_token.text)" in __solution__, "トークンのテキストをプリントしましたか？"
5 |     assert 'spacy.blank("ja")' in __solution__, 'spacy.blankに指定する言語は合っていますか？'
6 |     __msg__.good("よくできました！")
7 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_03_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert doc.text == "私はツリーカンガルーとイッカクが好きです。", "テキストをちゃんと処理しましたか？"
3 |     assert tree_kangaroos == doc[2:4], "ツリーカンガルーのスパンを選択しましたか？"
4 |     assert (
5 |         tree_kangaroos_and_narwhals == doc[2:6]
6 |     ), "ツリーカンガルーとイッカクのスパンを選択しましたか？"
7 |     __msg__.good("よくできました!")
8 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_07.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert "spacy.load" in __solution__, "spacy.loadを呼び出しましたか?"
3 |     assert nlp.meta["lang"] == "ja", "正しいモデルを呼び出しましたか？"
4 |     assert nlp.meta["name"] == "core_news_sm", "正しいモデルを呼び出しましたか？"
5 |     assert "nlp(text)" in __solution__, "テキストをちゃんと処理しましたか？"
6 |     assert "print(doc.text)" in __solution__, "docのテキストをプリントしましたか？"
7 | 
8 |     __msg__.good("よくできました！パイプラインのロードのやりかたを学んだので、モデルを用いた解析の方法を見ていきましょう。")
9 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_08_01.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert "token_text = token.text" in __solution__, "トークンの文字列をちゃんと取得していますか？"
 3 |     assert (
 4 |         "token_pos = token.pos_" in __solution__
 5 |     ), "トークンの品詞タグをちゃんと取得していますか？文字列属性を取得するには、アンダースコアを用いることを忘れないでください。"
 6 |     assert (
 7 |         "token_dep = token.dep_" in __solution__
 8 |     ), "トークンの依存関係ラベルをちゃんと取得していますか？文字列属性を取得するには、アンダースコアを用いることを忘れないでください。"
 9 |     __msg__.good("Perfect!")
10 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_08_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert "for ent in doc.ents" in __solution__, "固有表現をイテレートしていますか？"
3 |     assert "print(ent.text, ent.label_)" in __solution__, "文字列とラベルをプリントしましたか？"
4 | 
5 |     __msg__.good(
6 |         "素晴らしい！ここでは、モデルはすべての例で正しい予測を行いました。" "次の演習では、モデルが予測を誤る例を見ていき、モデルを修正する方法を学びます。"
7 |     )
8 | 


--------------------------------------------------------------------------------
/exercises/ja/test_01_09.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert "in doc.ents" in __solution__, "固有表現をイテレートしましたか？"
3 |     assert mihonomatsubara.text == "三保の松原", "mihonomatsubara変数は正しいスライスですか？"
4 | 
5 |     __msg__.good(
6 |         "完璧です！もちろん、いつもこのように手動でやる必要はありません。"
7 |         "次の演習では、単語やフレーズを探すためのルールベースのmatcherについて学んでいきます。"
8 |     )
9 | 


--------------------------------------------------------------------------------
/exercises/ja/test_02_02_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert cat_hash == nlp.vocab.strings["ネコ"], "正しいhashを代入しましたか？"
3 |     assert 'nlp.vocab.strings["ネコ"]' in __solution__, "正しい文字列を取得しましたか？"
4 |     assert cat_string == "ネコ", "正しい文字列を取得しましたか？"
5 |     assert "nlp.vocab.strings[cat_hash]" in __solution__, "hashから文字列を取得しましたか？"
6 | 
7 |     __msg__.good("素晴らしい！")
8 | 


--------------------------------------------------------------------------------
/exercises/ja/test_02_02_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert person_hash == nlp.vocab.strings["PERSON"], "正しいhashを代入しましたか？"
3 |     assert 'nlp.vocab.strings["PERSON"]' in __solution__, "正しいhashを代入しましたか？"
4 |     assert person_string == "PERSON", "正しい文字列を取得しましたか？"
5 |     assert "nlp.vocab.strings[person_hash]" in __solution__, "hashから文字列を取得しましたか？"
6 | 
7 |     __msg__.good("Good job!")
8 | 


--------------------------------------------------------------------------------
/exercises/ja/test_02_05_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert "from spacy.tokens import Doc" in __solution__, "Docクラスをちゃんとインポートしましたか？"
3 |     assert doc.text == "spaCyは素晴らしい！", "Docをちゃんと作成しましたか？"
4 |     assert "print(doc.text)" in __solution__, "Docの文字列をプリントしましたか？"
5 |     __msg__.good("よくできました！")
6 | 


--------------------------------------------------------------------------------
/exercises/ja/test_02_05_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert "from spacy.tokens import Doc" in __solution__, "Docクラスをちゃんとインポートしましたか？"
3 |     assert len(spaces) == 4, "Docをちゃんと作成しましたか？"
4 |     assert all(isinstance(s, bool) for s in spaces), "spacesはブール値である必要があります。"
5 |     assert [int(s) for s in spaces] == [0, 0, 0, 0], "スペースは正しいですか？"
6 |     assert doc.text == "さあ、始めよう！", "Docを正しく作成していますか？"
7 |     __msg__.good("Nice!")
8 | 


--------------------------------------------------------------------------------
/exercises/ja/test_02_09.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert 'spacy.load("ja_core_news_md")' in __solution__, "中サイズのパイプラインをロードしましたか？"
3 |     assert "doc[7].vector" in __solution__, "正しいベクトルを取得しましたか？"
4 |     __msg__.good("Well done！次章では、単語ベクトルを用いたdoc、スパン、トークン間の類似度の予測を行います。")
5 | 


--------------------------------------------------------------------------------
/exercises/ja/test_02_10_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc1.similarity(doc2)" or "doc2.similarity(doc1)" in __solution__
4 |     ), "2つのdocの類似度を比較しましたか？"
5 |     assert 0 <= float(similarity) <= 1, "simirlarityは浮動小数点数である必要があります。きちんと計算しましたか？"
6 |     __msg__.good("Well done!")
7 | 


--------------------------------------------------------------------------------
/exercises/ja/test_02_10_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "token1.similarity(token2)" or "token2.similarity(token1)" in __solution__
4 |     ), "2つのdocの類似度を比較しましたか？"
5 |     assert 0 <= float(similarity) <= 1, "simirlarityは浮動小数点数である必要があります。きちんと計算しましたか？"
6 |     __msg__.good("Nicely done!")
7 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert nlp.meta["name"] == "core_news_sm", "正しいパイプラインをロードしましたか？"
 3 |     assert nlp.meta["lang"] == "ja", "正しいパイプラインをロードしましたか？"
 4 |     assert "print(nlp.pipe_names)" in __solution__, "パイプラインの名前をプリントしましたか？"
 5 |     assert "print(nlp.pipeline)" in __solution__, "パイプラインをプリントしましたか？"
 6 | 
 7 |     __msg__.good(
 8 |         "Well done！今あるパイプラインについて調べたくなったときは、nlp.pipe_namesやnlp.pipelineを使ってプリントしましょう。"
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_06.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert "len(doc)" in __solution__, "docの長さを取得しましたか？"
3 |     assert "return doc" in __solution__, "docを返しましたか？"
4 |     assert "nlp.add_pipe" in __solution__, "コンポーネントを返しましたか？"
5 |     assert "first=True" in __solution__, "パイプラインの最初にコンポーネントを追加しましたか？"
6 |     assert nlp.pipe_names[0] == "length_component", "パイプラインの名前が正しくないようです！"
7 | 
8 |     __msg__.good("Perfect！もう少し複雑なコンポーネントを見ていきましょう！")
9 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_07.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert 'after="ner"' in __solution__, "明示的に固有表現抽出器のあとにコンポーネントを追加しましたか？"
3 |     assert nlp.pipe_names[-1] == "animal_component", "固有表現抽出器のあとにコンポーネントを追加しましたか？"
4 |     assert len(doc.ents) == 2, "きちんと固有表現を追加しましたか？"
5 |     assert all(ent.label_ == "ANIMAL" for ent in doc.ents), "ANIMALのラベルを追加しましたか？"
6 | 
7 |     __msg__.good("Good job！はじめてのカスタムパイプラインコンポーネントとしてルールベースの固有表現抽出器を作ることができましたね。")
8 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_09_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert Token.has_extension("reversed"), "拡張属性をトークンに追加しましたか？"
3 |     ext = Token.get_extension("reversed")
4 |     assert ext[2] is not None, "ゲッターをきちんと設定しましたか？"
5 |     assert "getter=get_reversed" in __solution__, "get_reversedをゲッターとして登録しましたか？"
6 |     assert "token._.reversed" in __solution__, "カスタム属性を取得しましたか？"
7 | 
8 |     __msg__.good("Good job！もっと複雑な属性を設定していきましょう。")
9 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_14_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "for doc in nlp.pipe(TEXTS)" in __solution__
4 |     ), "nlp.pipeによって生成されたdocをイテレートしましたか？"
5 |     __msg__.good("Nice!")
6 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_14_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "docs = list(nlp.pipe(TEXTS))" in __solution__
4 |     ), "nlp.pipeの結果に対してlistを呼び出しましたか？"
5 |     __msg__.good("Great work!")
6 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_14_03.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "patterns = list(nlp.pipe(people))" in __solution__
4 |     ), "nlp.pipeの結果に対してリストを呼び出しましたか？"
5 | 
6 |     __msg__.good("Good job！追加のメタデータとともにnlp.pipeを呼びだす実践的な例を見ていきましょう。")
7 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_16_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc = nlp.make_doc(text)" in __solution__
4 |         or "doc = nlp.tokenizer(text)" in __solution__
5 |     ), "トークナイズだけしましたか？"
6 | 
7 |     __msg__.good("Nicely done!")
8 | 


--------------------------------------------------------------------------------
/exercises/ja/test_03_16_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         'with nlp.select_pipes(disable=["parser"])' in __solution__
 4 |     ), "正しいコンポーネントに対して、nlp.select_pipesを呼び出しましたか？"
 5 | 
 6 |     __msg__.good(
 7 |         "Perfect！最適化のためのヒントや工夫について練習しました。" 
 8 |         "次章では、spaCyのニューラルネットワークモデルのトレーニングを行います。"
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/ja/test_04_06.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert 'spacy.blank("ja")' in __solution__, "空の日本語パイプラインを作成しましたか？"
3 |     assert (
4 |         len(nlp.pipe_names) == 1 and nlp.pipe_names[0] == "ner"
5 |     ), "固有表現抽出器をパイプラインに追加しましたか？"
6 |     assert len(ner.labels) == 1 and ner.labels[0] == "GADGET", "固有表現抽出器にラベルを追加しましたか？"
7 | 
8 |     __msg__.good("Well done！パイプラインの準備が完了たので、学習ループを書いていきましょう。")
9 | 


--------------------------------------------------------------------------------
/exercises/ja/train_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/ja/train_gadget.spacy


--------------------------------------------------------------------------------
/exercises/ja/tweets.json:
--------------------------------------------------------------------------------
1 | [
2 |     "マクドナルドは私の大好きなお店です。",
3 |     "ここでは@McDonaldsは調理済みのハンバーガしかないと思っていたのだが、どうやら未調理のものしかないのか?病気になっている暇はない...",
4 |     "人々はいまだにマクドナルドを食べているのか:(",
5 |     "スペインのマクドナルドには手羽先がある。私の心はとても幸せ。",
6 |     "@MacDonalds どうか今までで一番美味しいハンバーガーのアーチデラックスを復活させてください!! :P",
7 |     "早く開店して！私は#McRibのハンバーガが食べたい。",
8 |     "今朝はマクドナルドで食べて、今お腹が痛くなっている。"
9 | ]


--------------------------------------------------------------------------------
/exercises/pt/dev_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/pt/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/pt/exc_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importar a biblioteca spaCy
 2 | import ____
 3 | 
 4 | # Crie um objeto nlp do Inglês
 5 | nlp = ____
 6 | 
 7 | # Processe o texto
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Imprima o texto do documento
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importe a biblioteca spaCy
 2 | import ____
 3 | 
 4 | # Crie um objeto nlp do Alemão
 5 | nlp = ____
 6 | 
 7 | # Processe o texto (equivalente ao português: "Atenciosamente")
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Imprima o texto do documento
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importe a biblioteca spaCy
 2 | import ____
 3 | 
 4 | # Crie um objeto nlp do Espanhol
 5 | nlp = ____
 6 | 
 7 | # Processar o texto em espanhol (equivalente ao português: "Como vai?")
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Imprimir o texto do documento
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importar spacy e criar o objeto nlp do Português
 2 | import ____
 3 | nlp = ____
 4 | 
 5 | # Processar o texto
 6 | doc = ____("Eu gosto de gatos e cachorros.")
 7 | 
 8 | # Selecionar o primeiro token
 9 | first_token = doc[____]
10 | 
11 | # Imprimir o texto do primeito token
12 | print(first_token.____)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | doc = nlp("Eu tenho um gato amarelo.")
 5 | 
 6 | # Consulte o código hash da palavra "gato"
 7 | gato_hash = ____.____.____[____]
 8 | print(gato_hash)
 9 | 
10 | # Agora consulte o gato_hash para obter a palavra novamente
11 | gato_string = ____.____.____[____]
12 | print(gato_string)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | doc = nlp("David Bowie é uma PESSOA")
 5 | 
 6 | # Consulte o código hash para a string "PESSOA"
 7 | person_hash = ____.____.____[____]
 8 | print(person_hash)
 9 | 
10 | # Consulte o person_hash para obter o texto novamente
11 | person_string = ____.____.____[____]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | 
 5 | # Importe a classe Doc
 6 | from ____ import ____
 7 | 
 8 | # Texto desejado: "spaCy é bem legal!"
 9 | words = ["spaCy", "é", "bem","legal","!"]
10 | spaces = [True, True, True, False, False]
11 | 
12 | # Crie um Doc a partir das palavras words e o espaçamento spaces
13 | doc = ____(____, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | 
 5 | # Importe a classe Doc
 6 | from ____ import ____
 7 | 
 8 | # Texto desejado: "Vamos lá, vamos começar!"
 9 | words = ["Vamos","lá", ",", "vamos", "começar", "!"]
10 | spaces = [____, ____, ____, ____, ____, ____]
11 | 
12 | # Crie um Doc a partir das palavras words e espaçamento spaces
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | 
 5 | # Importe a classe Doc
 6 | from ____ import ____
 7 | 
 8 | # Texto desejado: "Oh, realmente?!"
 9 | words = [____, ____, ____, ____, ____]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # Crie um Doc a partir das palavras words e espaçamento spaces
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carregue o fluxo de processamento en_core_web_md
 4 | # para fazer o download do fluxo: python -m spacy download pt_core_news_md
 5 | nlp = ____
 6 | 
 7 | # Processe um texto 
 8 | doc = nlp("Duas bananas de pijamas")
 9 | 
10 | # Imprima o vetor para "bananas"
11 | bananas_vector = ____.____
12 | print(bananas_vector)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_md")
 4 | 
 5 | doc1 = nlp("Eu quero comprar um livro novo")
 6 | doc2 = nlp("Preciso ler um livro")
 7 | 
 8 | # Obtenha a similiridade entre doc1 e doc2
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_md")
 4 | 
 5 | doc = nlp("Televisão e livro")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Obtenha a similaridade dos tokens "Televisão" e "livro"
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_md")
 4 | 
 5 | doc = nlp("Visitamos um excelente restaurante. Em seguida fomos a um ótimo bar.")
 6 | 
 7 | # Crie partições para "excelente restaurante" e "ótimo bar"
 8 | span1 = ____
 9 | span2 = ____
10 | print(span1)
11 | print(span2)
12 | 
13 | # Obtenha a similaridade das partições
14 | similarity = ____.____(____)
15 | print(similarity)
16 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carregue o fluxo de procesamento pt_core_news_sm
 4 | nlp = ____
 5 | 
 6 | # Imprima o nome dos componentes do fluxo
 7 | print(____.____)
 8 | 
 9 | # Imprima as informações das tuplas (name, component)
10 | print(____.____)
11 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("pt_core_news_sm")
 5 | 
 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Processar os textos e imprimir os adjetivos
10 | for text in TEXTS:
11 |     doc = nlp(text)
12 |     print([token.text for token in doc if token.pos_ == "ADJ"])
13 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("pt_core_news_sm")
 5 | 
 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Processar os textos e imprimir as entidades
10 | docs = [nlp(text) for text in TEXTS]
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("en")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Criar uma lista de padrões de correspondência para o PhraseMatcher
8 | patterns = [nlp(person) for person in people]
9 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_sm")
 4 | text = (
 5 |     "Chick-fil-A é um restaurante fast-food com sede na cidade de College Park, "
 6 |     "estado da Georgia, especializado em sanduíches com carne de frango. "
 7 | )
 8 | 
 9 | # Apenas toquenizar o texto
10 | doc = nlp(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_03_16_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_sm")
 4 | text = (
 5 |     "Chick-fil-A é um restaurante fast-food com sede na cidade de College Park, "
 6 |     "estado da Georgia, especializado em sanduíches com carne de frango. "
 7 | )
 8 | 
 9 | # Desabilitar o lematizador
10 | with ____.____(____):
11 |     # Processar o texto
12 |     doc = ____
13 |     # Imprimir as entidades do doc
14 |     print(____)
15 | 


--------------------------------------------------------------------------------
/exercises/pt/exc_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ ____ --___ ____ --____ ____


--------------------------------------------------------------------------------
/exercises/pt/exc_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg


--------------------------------------------------------------------------------
/exercises/pt/exc_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____


--------------------------------------------------------------------------------
/exercises/pt/iphone.json:
--------------------------------------------------------------------------------
1 | [
2 |   "How to preorder the iPhone X",
3 |   "iPhone X is coming",
4 |   "Should I pay $1,000 for the iPhone X?",
5 |   "The iPhone 8 reviews are here",
6 |   "iPhone 11 vs iPhone 8: What's the difference?",
7 |   "I need a new phone! Any tips?"
8 | ]
9 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # Importar a biblioteca spaCy
 2 | import spacy
 3 | 
 4 | # Crie um objeto nlp do Inglês
 5 | nlp = spacy.blank("en")
 6 | 
 7 | # Processe o texto
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # Imprima o texto do documento
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # Importe a biblioteca spaCy
 2 | import spacy
 3 | 
 4 | # Crie um objeto nlp do Alemão
 5 | nlp = spacy.blank("de")
 6 | 
 7 | # Processe o texto (equivalente ao português: "Atenciosamente")
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # Imprima o texto do documento
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # Importe a biblioteca spaCy
 2 | import spacy
 3 | 
 4 | # Crie um objeto nlp do Espanhol
 5 | nlp = spacy.blank("es")
 6 | 
 7 | # Processar o texto em espanhol (equivalente ao português: "Como vai?")
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # Imprimir o texto do documento
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # Importar spacy e criar o objeto nlp do Português
 2 | import spacy
 3 | nlp = spacy.blank("pt")
 4 | 
 5 | # Processar o texto
 6 | doc = nlp("Eu gosto de gatos e cachorros.")
 7 | 
 8 | # Selecionar o primeiro token
 9 | first_token = doc[0]
10 | 
11 | # Imprimir o texto do primeito token
12 | print(first_token.text)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | doc = nlp("Eu tenho um gato amarelo.")
 5 | 
 6 | # Consulte o código hash da palavra "gato"
 7 | gato_hash = nlp.vocab.strings["gato"]
 8 | print(gato_hash)
 9 | 
10 | # Agora consulte o gato_hash para obter a palavra novamente
11 | gato_string = nlp.vocab.strings[gato_hash]
12 | print(gato_string)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | doc = nlp("David Bowie é uma PESSOA")
 5 | 
 6 | # Consulte o código hash para a string "PESSOA"
 7 | person_hash = nlp.vocab.strings["PESSOA"]
 8 | print(person_hash)
 9 | 
10 | # Consulte o person_hash para obter o texto novamente
11 | person_string = nlp.vocab.strings[person_hash]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | 
 5 | # Importe a classe Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Texto desejado: "spaCy é bem legal!"
 9 | words = ["spaCy", "é", "bem","legal","!"]
10 | spaces = [True, True, True, False, False]
11 | 
12 | # Crie um Doc a partir das palavras words e o espaçamento spaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("pt")
 4 | 
 5 | # Importe a classe Doc
 6 | from spacy.tokens import Doc
 7 | 
 8 | # Texto desejado: "Oh, realmente?!"
 9 | words = ["Oh", ",", "realmente", "?", "!"]
10 | spaces = [False, True, False, False, False]
11 | 
12 | # Crie um Doc a partir das palavras words e espaçamento spaces
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carregue o fluxo de processamento en_core_web_md
 4 | # para fazer o download do fluxo: python -m spacy download pt_core_news_md
 5 | nlp = spacy.load("pt_core_news_md")
 6 | 
 7 | # Processe um texto 
 8 | doc = nlp("Duas bananas de pijamas")
 9 | 
10 | # Imprima o vetor para "bananas"
11 | bananas_vector = doc[1].vector
12 | print(bananas_vector)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_md")
 4 | 
 5 | doc1 = nlp("Eu quero comprar um livro novo")
 6 | doc2 = nlp("Preciso ler um livro")
 7 | 
 8 | # Obtenha a similiridade entre doc1 e doc2
 9 | similarity = doc1.similarity(doc2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_md")
 4 | 
 5 | doc = nlp("Televisão e livro")
 6 | token1, token2 = doc[0], doc[2]
 7 | 
 8 | # Obtenha a similaridade dos tokens "Televisão" e "livro"
 9 | similarity = token1.similarity(token2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_md")
 4 | 
 5 | doc = nlp("Visitamos um excelente restaurante. Em seguida fomos a um ótimo bar.")
 6 | 
 7 | # Crie partições para "excelente restaurante" e "ótimo bar"
 8 | span1 = doc[2:4]
 9 | span2 = doc[10:12]
10 | print(span1)
11 | print(span2)
12 | 
13 | # Obtenha a similaridade das partições
14 | similarity = span1.similarity(span2)
15 | print(similarity)
16 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # Carregue o fluxo de procesamento en_core_web_sm
 4 | nlp = spacy.load("pt_core_news_sm")
 5 | 
 6 | # Imprima o nome dos componentes do fluxo
 7 | print(nlp.pipe_names)
 8 | 
 9 | # Imprima as informações das tuplas (name, component)
10 | print(nlp.pipeline)
11 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("pt_core_news_sm")
 5 | 
 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Processar os textos e imprimir os adjetivos
10 | for doc in nlp.pipe(TEXTS):
11 |     print([token.text for token in doc if token.pos_ == "ADJ"])
12 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("pt_core_news_sm")
 5 | 
 6 | with open("exercises/pt/tweets.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # Processar os textos e imprimir as entidades
10 | docs = list(nlp.pipe(TEXTS))
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("en")
4 | 
5 | people = ["David Bowie", "Angela Merkel", "Lady Gaga"]
6 | 
7 | # Criar uma lista de padrões de correspondência para o PhraseMatcher
8 | patterns = list(nlp.pipe(people))
9 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("pt_core_news_sm")
 4 | text = (
 5 |     "Chick-fil-A é um restaurante fast-food com sede na cidade de College Park, "
 6 |     "estado da Georgia, especializado em sanduíches com carne de frango. "
 7 | )
 8 | 
 9 | # Apenas toquenizar o texto
10 | doc = nlp.make_doc(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/pt/solution_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy init config ./config.cfg --lang en --pipeline ner


--------------------------------------------------------------------------------
/exercises/pt/solution_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg


--------------------------------------------------------------------------------
/exercises/pt/solution_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy train ./exercises/en/config_gadget.cfg --output ./output --paths.train ./exercises/en/train_gadget.spacy --paths.dev ./exercises/en/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/pt/test_02_05_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "from spacy.tokens import Doc" in __solution__
4 |     ), "Você fez a importação da classe Doc corretamente?"
5 |     assert doc.text == "spaCy é bem legal!", "Você tem certeza que criou o Doc corretamente?"
6 |     assert "print(doc.text)" in __solution__, "Você está imprimindo o texto do Doc?"
7 |     __msg__.good("Muito bom!")
8 | 


--------------------------------------------------------------------------------
/exercises/pt/test_02_10_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__
4 |     ), "Você está comparando a similaridade entre os dois documentos?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "O valor da similaridade deve ser um número de ponto flutuante. Você fez este cálculo corretamente?"
8 |     __msg__.good("Muito bem!")
9 | 


--------------------------------------------------------------------------------
/exercises/pt/test_02_10_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__
4 |     ), "Você está comparando a similaridade entre os dois tokens?"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "O valor da similaridade deve ser um número de ponto flutuante. Você fez este cálculo corretamente?"
8 |     __msg__.good("Bom!")
9 | 


--------------------------------------------------------------------------------
/exercises/pt/test_03_14_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "for doc in nlp.pipe(TEXTS)" in __solution__
4 |     ), "Você está iterando nos docs retornados em nlp.pipe?"
5 |     __msg__.good("Bom!")
6 | 


--------------------------------------------------------------------------------
/exercises/pt/test_03_14_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "docs = list(nlp.pipe(TEXTS))" in __solution__
4 |     ), "Você está usando nlp.pipe envolvido em uma lista (list)?"
5 |     __msg__.good("Bom trabalho!")
6 | 


--------------------------------------------------------------------------------
/exercises/pt/test_03_14_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "patterns = list(nlp.pipe(people))" in __solution__
 4 |     ), "Você está usando nlp.pipe envolvido em uma lista (list)?"
 5 | 
 6 |     __msg__.good(
 7 |         "Bom trabalho! Vamos seguir agora com um exemplo prático que "
 8 |         "usa nlp.pipe para processar documentos com metadados adicionais."
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/pt/test_03_16_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc = nlp.make_doc(text)" in __solution__
4 |         or "doc = nlp.tokenizer(text)" in __solution__
5 |     ), "Você está apenas toquenizando o texto?"
6 | 
7 |     __msg__.good("Muito bom!")
8 | 


--------------------------------------------------------------------------------
/exercises/pt/train_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/pt/train_gadget.spacy


--------------------------------------------------------------------------------
/exercises/zh/dev_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/zh/dev_gadget.spacy


--------------------------------------------------------------------------------
/exercises/zh/exc_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy
 2 | import ____
 3 | 
 4 | # 创建英文nlp对象
 5 | nlp = ____
 6 | 
 7 | # 处理文本
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # 打印文本
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy
 2 | import ____
 3 | 
 4 | # 创建德语nlp对象
 5 | nlp = ____
 6 | 
 7 | # 处理文本 (这是德语"Kind regards!"的意思)
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # 打印文本
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy
 2 | import ____
 3 | 
 4 | # 创建西班牙语nlp对象
 5 | nlp = ____
 6 | 
 7 | # 处理文本 (这是西班牙语"How are you?"的意思)
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # 打印文本
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_02_04.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy
 2 | import ____
 3 | 
 4 | # 创建中文nlp对象
 5 | nlp = ____
 6 | 
 7 | # 处理文本
 8 | doc = nlp("这是一个句子。")
 9 | 
10 | # 打印文本
11 | print(____.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy创建中文nlp对象
 2 | import ____
 3 | 
 4 | nlp = ____
 5 | 
 6 | # 处理文本
 7 | doc = ____("我喜欢老虎和狮子。")
 8 | 
 9 | # 选择第一个词符
10 | first_token = doc[____]
11 | 
12 | # 打印第一个词符的文本
13 | print(first_token.____)
14 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_03_02.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy并创建中文nlp对象
 2 | import ____
 3 | 
 4 | nlp = ____
 5 | 
 6 | # 处理文本
 7 | doc = ____("我喜欢老虎和狮子。")
 8 | 
 9 | # 遍历打印doc中的内容
10 | for i, token in enumerate(doc):
11 |     print(i, token.text)
12 | 
13 | # 截取Doc中"老虎"的部分
14 | laohu = ____
15 | print(laohu.text)
16 | 
17 | # 截取Doc中"老虎和狮子"的部分(不包括"。")
18 | laohu_he_shizi = ____
19 | print(laohu_he_shizi.text)
20 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_04.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("zh")
 4 | 
 5 | # 处理文本
 6 | doc = nlp(
 7 |     "在1990年，一份豆腐脑可能只要￥0.5。"
 8 |     "现在一份豆腐脑可能要￥5左右了。"
 9 | )
10 | 
11 | # 遍历doc中的词符
12 | for token in doc:
13 |     # 检测词符的文本是否是"￥"
14 |     if token.____ == "￥":
15 |         # 获取文档中的下一个词符
16 |         next_token = ____[____]
17 |         # 检测下一个词符是否组成一个数字
18 |         if ____.____:
19 |             print("Price found:", next_token.text)
20 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 读取"zh_core_web_sm"流程
 4 | nlp = ____
 5 | 
 6 | text = "写入历史了：苹果是美国第一家市值超过一万亿美元的上市公司。"
 7 | 
 8 | # 处理文本
 9 | doc = ____
10 | 
11 | # 打印doc中的文本
12 | print(____.____)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_08_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | 
 5 | text = "写入历史了：苹果是美国第一家市值超过一万亿美元的上市公司。"
 6 | 
 7 | # 处理文本
 8 | doc = ____
 9 | 
10 | for token in doc:
11 |     # 获取词符文本、词性标注及依存关系标签
12 |     token_text = ____.____
13 |     token_pos = ____.____
14 |     token_dep = ____.____
15 |     # 规范化打印的格式
16 |     print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}")
17 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | 
 5 | text = "写入历史了：苹果是美国第一家市值超过一万亿美元的上市公司。"
 6 | 
 7 | # 处理文本
 8 | doc = ____
 9 | 
10 | # 对识别出的实体进行遍历
11 | for ent in ____.____:
12 |     # 打印实体文本及标注
13 |     print(ent.____, ____.____)
14 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | doc = nlp("我养了一只猫。")
 5 | 
 6 | # 查找词汇"猫"的哈希值
 7 | cat_hash = ____.____.____[____]
 8 | print(cat_hash)
 9 | 
10 | # 查找cat_hash来得到字符串
11 | cat_string = ____.____.____[____]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | doc = nlp("周杰伦是一个人物。")
 5 | 
 6 | # 查找标签是"人物"的字符串的哈希值
 7 | person_hash = ____.____.____[____]
 8 | print(person_hash)
 9 | 
10 | # 查找person_hash来拿到字符串
11 | person_string = ____.____.____[____]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # 导入Doc类
 6 | from ____ import ____
 7 | 
 8 | # 目标文本："spaCy is cool!"
 9 | words = ["spaCy", "is", "cool", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # 用words和spaces创建一个Doc
13 | doc = ____(____, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # 导入Doc类
 6 | from ____ import ____
 7 | 
 8 | # 目标文本："Go, get started!"
 9 | words = ["Go", ",", "get", "started", "!"]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # 使用words和spaces创建一个Doc
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # 导入Doc类
 6 | from ____ import ____
 7 | 
 8 | # 目标文本："Oh, really?!"
 9 | words = [____, ____, ____, ____, ____]
10 | spaces = [____, ____, ____, ____, ____]
11 | 
12 | # 用words和spaces创建一个Doc
13 | doc = ____(____, ____=____, ____=____)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 读取zh_core_web_md流程
 4 | nlp = ____
 5 | 
 6 | # 处理文本
 7 | doc = nlp("两只老虎跑得快")
 8 | 
 9 | for token in doc:
10 |     print(token.text)
11 | 
12 | # 获取词符"老虎"的向量
13 | laohu_vector = ____.____
14 | print(laohu_vector)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_md")
 4 | 
 5 | doc1 = nlp("这是一个温暖的夏日")
 6 | doc2 = nlp("外面阳光明媚")
 7 | 
 8 | # 获取doc1和doc2的相似度
 9 | similarity = ____.____(____)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_md")
 4 | 
 5 | doc = nlp("电影和音乐")
 6 | 
 7 | for i, token in enumerate(doc):
 8 |     print(i, token.text)
 9 | 
10 | token1, token2 = doc[0], doc[2]
11 | 
12 | # 获取词符"TV"和"books"的相似度
13 | similarity = ____.____(____)
14 | print(similarity)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_md")
 4 | 
 5 | doc = nlp("这是一家不错的餐厅。之后我们又去了一家很好的酒吧。")
 6 | 
 7 | for i, token in enumerate(doc):
 8 |     print(i, token.text)
 9 | 
10 | # 给"great restaurant"和"really nice bar"分别创建span
11 | span1 = ____
12 | span2 = ____
13 | 
14 | # 获取两个span的相似度
15 | similarity = ____.____(____)
16 | print(similarity)
17 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 读取zh_core_web_sm流程
 4 | nlp = ____
 5 | 
 6 | # 打印流程组件的名字
 7 | print(____.____)
 8 | 
 9 | # 打印完整流程的(name, component)元组
10 | print(____.____)
11 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_09_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Token
 3 | 
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | # 注册词符的扩展属性"is_country"，其默认值是False
 7 | ____.____(____, ____=____)
 8 | 
 9 | # 处理文本，将词符"新加坡"的is_country属性设置为True
10 | doc = nlp("我住在新加坡。")
11 | ____ = True
12 | 
13 | # 对所有词符打印词符文本及is_country属性
14 | print([(____, ____) for token in doc])
15 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_09_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Token
 3 | 
 4 | nlp = spacy.blank("zh")
 5 | 
 6 | # 定义取值器函数，读入一个词符并返回其逆序的文本
 7 | def get_reversed(token):
 8 |     return token.text[::-1]
 9 | 
10 | 
11 | # 注册词符的扩展属性get_reversed及其取值器get_reversed
12 | ____.____(____, ____=____)
13 | 
14 | # 处理文本，打印没一个词符的逆序属性
15 | doc = nlp("我说的所有话都是假的，包括这一句。")
16 | for ____ in ____:
17 |     print("reversed:", ____)
18 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Doc
 3 | 
 4 | nlp = spacy.blank("zh")
 5 | 
 6 | # 定义取值器函数
 7 | def get_has_number(doc):
 8 |     # 返回是否doc中的任一个词符的token.like_num返回True
 9 |     return any(____ for token in doc)
10 | 
11 | 
12 | # 注册Doc的扩展属性"has_number"及其取值器get_has_number
13 | ____.____(____, ____=____)
14 | 
15 | # 处理文本，检查定制化的has_number属性
16 | doc = nlp("这家博物馆在2012年关了五个月。")
17 | print("has_number:", ____)
18 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Span
 3 | 
 4 | nlp = spacy.blank("zh")
 5 | 
 6 | # 定义这个方法
 7 | def to_html(span, tag):
 8 |     # 将span文本包在HTML标签中并返回
 9 |     return f"<{tag}>{span.text}</{tag}>"
10 | 
11 | 
12 | # 注册这个Span方法扩展名"to_html"及其方法to_html
13 | ____.____(____, ____=____)
14 | 
15 | # 处理文本，在span上调用to_html方法及其标签名"strong"
16 | doc = nlp("大家好，这是一个句子。")
17 | span = doc[0:3]
18 | print(____)
19 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # 处理文本，打印形容词
10 | for text in TEXTS:
11 |     doc = nlp(text)
12 |     print([token.text for token in doc if token.pos_ == "ADJ"])
13 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # 处理文本，打印实体
10 | docs = [nlp(text) for text in TEXTS]
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("zh")
4 | 
5 | people = ["周杰伦", "庞麦郎", "诸葛亮"]
6 | 
7 | # 为PhraseMatcher创建一个模板列表
8 | patterns = [nlp(person) for person in people]
9 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | text = (
 5 |     "在300多年的风雨历程中，历代同仁堂人始终恪守“炮制虽繁必不敢省人工，品味虽贵必不敢减物力”的古训，"
 6 |     "树立“修合无人见，存心有天知”的自律意识，造就了制药过程中兢兢小心、精益求精的严细精神。"
 7 | )
 8 | 
 9 | # 仅对文本做分词
10 | doc = nlp(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_03_16_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | text = (
 5 |     "在300多年的风雨历程中，历代同仁堂人始终恪守“炮制虽繁必不敢省人工，品味虽贵必不敢减物力”的古训，"
 6 |     "树立“修合无人见，存心有天知”的自律意识，造就了制药过程中兢兢小心、精益求精的严细精神。"
 7 | )
 8 | 
 9 | # 关闭tagger和parser
10 | with ____.____(____):
11 |     # 处理文本
12 |     doc = ____
13 |     # 打印doc中的实体
14 |     print(____)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ ____ --___ ____ --____ ____
2 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/zh/exc_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy ____ ____ --output ____ --paths.train ____ --paths.dev ____
2 | 


--------------------------------------------------------------------------------
/exercises/zh/gadgets.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     ["如何预定iPhone X", {"entities": [[4, 12, "GADGET"]] }],
 3 |     ["iPhone X就要来了", {"entities": [[0, 8, "GADGET"]] }],
 4 |     ["为买一个iPhone X花上万块钱值得吗？", {"entities": [[4, 12, "GADGET"]] }],
 5 |     ["iPhone 8的评测出来了", {"entities": [[0, 8, "GADGET"]] }],
 6 |     ["最新的iPhone已经到第11代了", {"entities": [[3, 9, "GADGET"]] }],
 7 |     ["我急需一部新手机，给点建议吧！", {"entities": [] }]
 8 | ]
 9 | 
10 | 


--------------------------------------------------------------------------------
/exercises/zh/iphone.json:
--------------------------------------------------------------------------------
1 | [
2 |   "如何预定iPhone X",
3 |   "iPhone X就要来了",
4 |   "为买一个iPhone X花上万块钱值得吗？",
5 |   "iPhone 8的评测出来了",
6 |   "iPhone 11 vs iPhone 8：有哪些升级？",
7 |   "我急需一部新手机，给点建议吧！"
8 | ]
9 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_02_01.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy
 2 | import spacy
 3 | 
 4 | # 创建英文nlp对象
 5 | nlp = spacy.blank("en")
 6 | 
 7 | # 处理文本
 8 | doc = nlp("This is a sentence.")
 9 | 
10 | # 打印文本
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_02_02.py:
--------------------------------------------------------------------------------
 1 | # 导入spaCy
 2 | import spacy
 3 | 
 4 | # 创建德语nlp对象
 5 | nlp = spacy.blank("de")
 6 | 
 7 | # 处理文本 (这是德语"Kind regards!"的意思)
 8 | doc = nlp("Liebe Grüße!")
 9 | 
10 | # 打印文本
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_02_03.py:
--------------------------------------------------------------------------------
 1 | # 导入西班牙语类
 2 | import spacy
 3 | 
 4 | # 创建西班牙语nlp对象
 5 | nlp = spacy.blank("es")
 6 | 
 7 | # 处理文本 (这是西班牙语"How are you?"的意思)
 8 | doc = nlp("¿Cómo estás?")
 9 | 
10 | # 打印文本
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_02_04.py:
--------------------------------------------------------------------------------
 1 | # 导入中文类
 2 | import spacy
 3 | 
 4 | # 创建中文nlp对象
 5 | nlp = spacy.blank("zh")
 6 | 
 7 | # 处理文本
 8 | doc = nlp("这是一个句子。")
 9 | 
10 | # 打印文本
11 | print(doc.text)
12 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_03_01.py:
--------------------------------------------------------------------------------
 1 | # 导入spacy并创建中文nlp对象
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("zh")
 5 | 
 6 | # 处理文本
 7 | doc = nlp("我喜欢老虎和狮子。")
 8 | 
 9 | # 选择第一个词符
10 | first_token = doc[0]
11 | 
12 | # 打印第一个词符的文本
13 | print(first_token.text)
14 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_03_02.py:
--------------------------------------------------------------------------------
 1 | # 导入spacy并创建中文nlp对象
 2 | import spacy
 3 | 
 4 | nlp = spacy.blank("zh")
 5 | 
 6 | # 处理文本
 7 | doc = nlp("我喜欢老虎和狮子。")
 8 | 
 9 | # 遍历打印doc中的内容
10 | for i, token in enumerate(doc):
11 |     print(i, token.text)
12 | 
13 | # 截取Doc中"老虎"的部分
14 | laohu = doc[2:3]
15 | print(laohu.text)
16 | 
17 | # 截取Doc中"老虎和狮子"的部分(不包括"。")
18 | laohu_he_shizi = doc[2:5]
19 | print(laohu_he_shizi.text)
20 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 读取"zh_core_web_sm"流程
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | text = "写入历史了：苹果是美国第一家市值超过一万亿美元的上市公司。"
 7 | 
 8 | # 处理文本
 9 | doc = nlp(text)
10 | 
11 | # 打印doc中的文本
12 | print(doc.text)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_08_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | 
 5 | text = "写入历史了：苹果是美国第一家市值超过一万亿美元的上市公司。"
 6 | 
 7 | # 处理文本
 8 | doc = nlp(text)
 9 | 
10 | for token in doc:
11 |     # 获取词符文本、词性标注及依存关系标签
12 |     token_text = token.text
13 |     token_pos = token.pos_
14 |     token_dep = token.dep_
15 |     # 规范化打印的格式
16 |     print(f"{token_text:<12}{token_pos:<10}{token_dep:<10}")
17 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_01_08_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | 
 5 | text = "写入历史了：苹果是美国第一家市值超过一万亿美元的上市公司。"
 6 | 
 7 | # 处理文本
 8 | doc = nlp(text)
 9 | 
10 | # 对识别出的实体进行遍历
11 | for ent in doc.ents:
12 |     # 打印实体文本及标注
13 |     print(ent.text, ent.label_)
14 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_02_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | doc = nlp("我养了一只猫。")
 5 | 
 6 | # 查找词汇"猫"的哈希值
 7 | cat_hash = nlp.vocab.strings["猫"]
 8 | print(cat_hash)
 9 | 
10 | # 查找cat_hash来得到字符串
11 | cat_string = nlp.vocab.strings[cat_hash]
12 | print(cat_string)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_02_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | doc = nlp("周杰伦是一个人物。")
 5 | 
 6 | # 查找标签是"人物"的字符串的哈希值
 7 | person_hash = nlp.vocab.strings["人物"]
 8 | print(person_hash)
 9 | 
10 | # 查找person_hash来拿到字符串
11 | person_string = nlp.vocab.strings[person_hash]
12 | print(person_string)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_05_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # 导入Doc类
 6 | from spacy.tokens import Doc
 7 | 
 8 | # 目标文本："spaCy is cool!"
 9 | words = ["spaCy", "is", "cool", "!"]
10 | spaces = [True, True, False, False]
11 | 
12 | # 用words和spaces创建一个Doc
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_05_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # 导入Doc类
 6 | from spacy.tokens import Doc
 7 | 
 8 | # 目标文本："Go, get started!"
 9 | words = ["Go", ",", "get", "started", "!"]
10 | spaces = [False, True, True, False, False]
11 | 
12 | # 使用words和spaces创建一个Doc
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_05_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.blank("en")
 4 | 
 5 | # 导入Doc类
 6 | from spacy.tokens import Doc
 7 | 
 8 | # 目标文本："Oh, really?!"
 9 | words = ["Oh", ",", "really", "?", "!"]
10 | spaces = [False, True, False, False, False]
11 | 
12 | # 用words和spaces创建一个Doc
13 | doc = Doc(nlp.vocab, words=words, spaces=spaces)
14 | print(doc.text)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_07.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | doc = nlp("北京是一座美丽的城市。")
 5 | 
 6 | # 遍历所有的词符
 7 | for token in doc:
 8 |     # 检查当前词符是否是一个专有名词
 9 |     if token.pos_ == "PROPN":
10 |         # 检查下一个词符是否是一个动词
11 |         if doc[token.i + 1].pos_ == "VERB":
12 |             print("找到了动词前面的一个专有名词:", token.text)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_09.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 读取zh_core_web_md流程
 4 | nlp = spacy.load("zh_core_web_md")
 5 | 
 6 | # 处理文本
 7 | doc = nlp("两只老虎跑得快")
 8 | 
 9 | for token in doc:
10 |     print(token.text)
11 | 
12 | # 获取词符"老虎"的向量
13 | laohu_vector = doc[2].vector
14 | print(laohu_vector)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_10_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_md")
 4 | 
 5 | doc1 = nlp("这是一个温暖的夏日")
 6 | doc2 = nlp("外面阳光明媚")
 7 | 
 8 | # 获取doc1和doc2的相似度
 9 | similarity = doc1.similarity(doc2)
10 | print(similarity)
11 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_10_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_md")
 4 | 
 5 | doc = nlp("电影和音乐")
 6 | 
 7 | for i, token in enumerate(doc):
 8 |     print(i, token.text)
 9 |     
10 | token1, token2 = doc[0], doc[2]
11 | 
12 | # 获取词符"电影"和"音乐"的相似度
13 | similarity = token1.similarity(token2)
14 | print(similarity)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_02_10_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_md")
 4 | 
 5 | doc = nlp("这是一家不错的餐厅。之后我们又去了一家很好的酒吧。")
 6 | 
 7 | for i, token in enumerate(doc):
 8 |     print(i, token.text)
 9 | 
10 | # 给"不错的餐厅"和"很好的酒吧"分别创建span
11 | span1 = doc[2:5]
12 | span2 = doc[12:15]
13 | 
14 | # 获取两个span的相似度
15 | similarity = span1.similarity(span2)
16 | print(similarity)
17 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_03_03.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | # 读取zh_core_web_sm流程
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | # 打印流程组件的名字
 7 | print(nlp.pipe_names)
 8 | 
 9 | # 打印完整流程的(name, component)元组
10 | print(nlp.pipeline)
11 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_03_09_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | from spacy.tokens import Token
 3 | 
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | # 注册词符的扩展属性"is_country"，其默认值是False
 7 | Token.set_extension("is_country", default=False)
 8 | 
 9 | # 处理文本，将词符"新加坡"的is_country属性设置为True
10 | doc = nlp("我住在新加坡。")
11 | doc[2]._.is_country = True
12 | 
13 | # 对所有词符打印词符文本及is_country属性
14 | print([(token.text, token._.is_country) for token in doc])
15 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_03_14_01.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # 处理文本，打印形容词
10 | for doc in nlp.pipe(TEXTS):
11 |     print([token.text for token in doc if token.pos_ == "ADJ"])
12 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_03_14_02.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import spacy
 3 | 
 4 | nlp = spacy.load("zh_core_web_sm")
 5 | 
 6 | with open("exercises/zh/weibo.json", encoding="utf8") as f:
 7 |     TEXTS = json.loads(f.read())
 8 | 
 9 | # 处理文本，打印实体
10 | docs = list(nlp.pipe(TEXTS))
11 | entities = [doc.ents for doc in docs]
12 | print(*entities)
13 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_03_14_03.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | 
3 | nlp = spacy.blank("zh")
4 | 
5 | people = ["周杰伦", "庞麦郎", "诸葛亮"]
6 | 
7 | # 为PhraseMatcher创建一个模板列表
8 | patterns = list(nlp.pipe(people))
9 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_03_16_01.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | text = (
 5 |     "在300多年的风雨历程中，历代同仁堂人始终恪守“炮制虽繁必不敢省人工，品味虽贵必不敢减物力”的古训，"
 6 |     "树立“修合无人见，存心有天知”的自律意识，造就了制药过程中兢兢小心、精益求精的严细精神。"
 7 | )
 8 | 
 9 | # 仅对文本做分词
10 | doc = nlp.make_doc(text)
11 | print([token.text for token in doc])
12 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_03_16_02.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | nlp = spacy.load("zh_core_web_sm")
 4 | text = (
 5 |     "在300多年的风雨历程中，历代同仁堂人始终恪守“炮制虽繁必不敢省人工，品味虽贵必不敢减物力”的古训，"
 6 |     "树立“修合无人见，存心有天知”的自律意识，造就了制药过程中兢兢小心、精益求精的严细精神。"
 7 | )
 8 | 
 9 | # 关闭tagger和parser
10 | with nlp.select_pipes(disable=["tagger", "parser"]):
11 |     # 处理文本
12 |     doc = nlp(text)
13 |     # 打印doc中的实体
14 |     print(doc.ents)
15 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_04_07_01.sh:
--------------------------------------------------------------------------------
1 | python -m spacy init config ./config.cfg --lang zh --pipeline ner
2 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_04_07_02.sh:
--------------------------------------------------------------------------------
1 | cat ./config.cfg
2 | 


--------------------------------------------------------------------------------
/exercises/zh/solution_04_08.sh:
--------------------------------------------------------------------------------
1 | python -m spacy train ./exercises/zh/config_gadget.cfg --output ./output --paths.train ./exercises/zh/train_gadget.spacy --paths.dev ./exercises/zh/dev_gadget.spacy
2 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_02_01.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.en
 4 | 
 5 |     assert isinstance(
 6 |         nlp, spacy.lang.en.English
 7 |     ), "nlp应该是英文类的一个实例。"
 8 |     assert isinstance(
 9 |         doc, spacy.tokens.Doc
10 |     ), "你用nlp实例处理过文本并且创建了一个doc吗？"
11 |     assert "print(doc.text)" in __solution__, "你打印doc.text了吗？"
12 | 
13 |     __msg__.good("干得漂亮！")
14 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_02_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.de
 4 | 
 5 |     assert isinstance(
 6 |         nlp, spacy.lang.de.German
 7 |     ), "nlp应该是德文类的一个实例。"
 8 |     assert isinstance(
 9 |         doc, spacy.tokens.Doc
10 |     ), "你用nlp实例处理过文本并且创建了一个doc吗？"
11 |     assert "print(doc.text)" in __solution__, "你打印doc.text了吗？"
12 | 
13 |     __msg__.good("Sehr gut! :)")
14 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_02_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.es
 4 | 
 5 |     assert isinstance(
 6 |         nlp, spacy.lang.es.Spanish
 7 |     ), "nlp应该是西班牙语类的一个实例。"
 8 |     assert isinstance(
 9 |         doc, spacy.tokens.Doc
10 |     ), "你用nlp实例处理过文本并且创建了一个doc吗？"
11 |     assert "print(doc.text)" in __solution__, "你打印doc.text了吗？"
12 | 
13 |     __msg__.good("Perfecto! 我们现在继续试试documents，spans和tokens.")
14 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_02_04.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     import spacy.tokens
 3 |     import spacy.lang.es
 4 | 
 5 |     assert isinstance(
 6 |         nlp, spacy.lang.zh.Chinese
 7 |     ), "nlp应该是中文类的一个实例。"
 8 |     assert isinstance(
 9 |         doc, spacy.tokens.Doc
10 |     ), "你用nlp实例处理过文本并且创建了一个doc吗？"
11 |     assert "print(doc.text)" in __solution__, "你打印doc.text了吗？"
12 | 
13 |     __msg__.good("Perfecto! 我们现在继续试试documents，spans和tokens.")
14 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_03_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         doc.text == "我喜欢老虎和狮子。"
4 |     ), "你确定你正确处理了文本吗？"
5 |     assert first_token == doc[0], "你确定你选择了第一个词符吗？"
6 |     assert "print(first_token.text)" in __solution__, "你打印了词符的文本吗？"
7 |     assert 'spacy.blank("zh")' in __solution__, '你将spacy.blank设置为正确的语言了吗？'
8 |     __msg__.good("干得漂亮！")
9 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_03_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         doc.text == "我喜欢老虎和狮子。"
 4 |     ), "你确定你正确处理文本了吗？"
 5 |     assert (
 6 |         laohu == doc[2:3]
 7 |     ), "你确定你选择了老虎的正确跨度吗？"
 8 |     assert (
 9 |         laohu_he_shizi == doc[2:5]
10 |     ), "你确定你选择了老虎和狮子的正确跨度吗？"
11 |     assert 'spacy.blank("zh")' in __solution__, '你将spacy.blank设置为正确的语言了吗？'
12 |     __msg__.good("好样的！")
13 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_07.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert "spacy.load" in __solution__, "你有调用spacy.load吗？"
 3 |     assert nlp.meta["lang"] == "zh", "你有调用正确的流程吗？"
 4 |     assert nlp.meta["name"] == "core_web_sm", "你有调用正确的流程吗？"
 5 |     assert "nlp(text)" in __solution__, "你有正确处理文本吗？"
 6 |     assert "print(doc.text)" in __solution__, "你有打印Doc的文本吗？"
 7 | 
 8 |     __msg__.good(
 9 |         "好极了！现在你已经练习过读取模型，我们来看看模型的一些预测方法。"
10 |     )
11 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_08_01.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "token_text = token.text" in __solution__
 4 |     ), "你有正确拿到词符的文本吗？"
 5 |     assert (
 6 |         "token_pos = token.pos_" in __solution__
 7 |     ), "你有正确拿到词符的词性标注了吗？记着要用带下划线的属性。"
 8 |     assert (
 9 |         "token_dep = token.dep_" in __solution__
10 |     ), "你有正确拿到词符的依存关系标签了吗？记着要用带下划线的属性。"
11 |     __msg__.good("完美！")
12 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_08_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert "for ent in doc.ents" in __solution__, "你有遍历所有实体吗？"
 3 |     assert (
 4 |         "print(ent.text, ent.label_)" in __solution__
 5 |     ), "你有打印文本和标注吗？"
 6 | 
 7 |     __msg__.good(
 8 |         "太棒啦！到现在为止，每一次模型都是正确的。"
 9 |         "下一个练习我们看看模型错了会怎么样，"
10 |         "以及如何调整模型。"
11 |     )
12 | 


--------------------------------------------------------------------------------
/exercises/zh/test_01_09.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert "for ent in doc.ents" in __solution__, "你有遍历实体吗？"
 3 |     assert iphone_x.text == "iPhone X", "你确定iphone_x包含了所有正确的词符吗？"
 4 | 
 5 |     __msg__.good(
 6 |         "完美！当然你也不用一定要这么手动来做。"
 7 |         "下一个练习我们来学习spaCy的基于规则的matcher，"
 8 |         "使用它我们就可以在文本中寻找到特定的词语和短语了。"
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/zh/test_02_02_01.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert cat_hash == nlp.vocab.strings["猫"], "你有使用正确的哈希值吗？"
 3 |     assert 'nlp.vocab.strings["猫"]' in __solution__, "你有使用正确的字符串吗？"
 4 |     assert cat_string == "猫", "你有获得正确的字符串吗？"
 5 |     assert (
 6 |         "nlp.vocab.strings[cat_hash]" in __solution__
 7 |     ), "你有从哈希值中获得字符串吗？"
 8 | 
 9 |     __msg__.good("干得漂亮！")
10 | 


--------------------------------------------------------------------------------
/exercises/zh/test_02_02_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         person_hash == nlp.vocab.strings["人物"]
 4 |     ), "你有使用正确的哈希值吗？"
 5 |     assert (
 6 |         'nlp.vocab.strings["人物"]' in __solution__
 7 |     ), "你有使用正确的哈希值吗？"
 8 |     assert person_string == "人物", "你有获得正确的字符串吗？"
 9 |     assert (
10 |         "nlp.vocab.strings[person_hash]" in __solution__
11 |     ), "你有从哈希值中获得字符串吗？"
12 | 
13 |     __msg__.good("干得漂亮！")
14 | 


--------------------------------------------------------------------------------
/exercises/zh/test_02_05_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "from spacy.tokens import Doc" in __solution__
4 |     ), "你有正确导入Doc类吗？"
5 |     assert doc.text == "spaCy is cool!", "你有正确创建Doc吗？"
6 |     assert "print(doc.text)" in __solution__, "你有打印Doc的文字吗？"
7 |     __msg__.good("好极了！")
8 | 


--------------------------------------------------------------------------------
/exercises/zh/test_02_09.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         'spacy.load("zh_core_web_md")' in __solution__
 4 |     ), "你有正确读入中等规模的流程吗？"
 5 |     assert "doc[2].vector" in __solution__, "你有得到正确的向量吗？"
 6 |     __msg__.good(
 7 |         "干得漂亮！下一个练习，我们会用spaCy来通过这些词向量计算document、span、和token"
 8 |         "之间的相似度。"
 9 |     )
10 | 


--------------------------------------------------------------------------------
/exercises/zh/test_02_10_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc1.similarity(doc2)" in __solution__ or "doc2.similarity(doc1)" in __solution__
4 |     ), "你有计算两个doc之间的相似度吗？"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "相似度分数是一个浮点数。你确定你计算正确了吗？"
8 |     __msg__.good("棒棒哒！")
9 | 


--------------------------------------------------------------------------------
/exercises/zh/test_02_10_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "token1.similarity(token2)" in __solution__ or "token2.similarity(token1)" in __solution__
4 |     ), "你有计算两个token之间的相似度吗？"
5 |     assert (
6 |         0 <= float(similarity) <= 1
7 |     ), "相似度分数是一个浮点数。你确定你计算正确了吗？"
8 |     __msg__.good("厉害！")
9 | 


--------------------------------------------------------------------------------
/exercises/zh/test_02_14.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         "from spacy.matcher import PhraseMatcher" in __solution__
 4 |     ), "你有正确导入PhraseMatcher吗？"
 5 |     assert (
 6 |         "PhraseMatcher(nlp.vocab)" in __solution__
 7 |     ), "你有正确初始化PhraseMatcher吗？"
 8 |     assert "matcher(doc)" in __solution__, "你有在doc上调用matcher吗？"
 9 |     assert len(matches) == 2, "匹配结果数目不对，应该是2个。"
10 |     __msg__.good("棒极了！我们来用这个matcher添加一些定制化的实体。")
11 | 


--------------------------------------------------------------------------------
/exercises/zh/test_03_03.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert nlp.meta["name"] == "core_web_sm", "你有读取正确的流程吗？"
 3 |     assert nlp.meta["lang"] == "zh", "你有读取正确的流程吗？"
 4 |     assert "print(nlp.pipe_names)" in __solution__, "你有打印组件名字了吗？"
 5 |     assert "print(nlp.pipeline)" in __solution__, "你有打印流程了吗？"
 6 | 
 7 |     __msg__.good(
 8 |         "干得漂亮！当你不确定当前流程的时候，你可以随时打印nlp.pipe_names或者"
 9 | 	"nlp.pipeline来检查下。"
10 |     )
11 | 


--------------------------------------------------------------------------------
/exercises/zh/test_03_06.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert "len(doc)" in __solution__, "你有拿到doc的长度了吗？"
 3 |     assert "return doc" in __solution__, "你有返回这个doc吗？"
 4 |     assert "nlp.add_pipe" in __solution__, "你有添加这个组件吗？"
 5 |     assert (
 6 |         "first=True" in __solution__
 7 |     ), "你有把组件加到流程的最前面吗？"
 8 |     assert nlp.pipe_names[0] == "length_component", "组件名字好像不太对？"
 9 |     __msg__.good("完美！现在我们来看看再复杂一点的组件！")
10 | 


--------------------------------------------------------------------------------
/exercises/zh/test_03_14_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "for doc in nlp.pipe(TEXTS)" in __solution__
4 |     ), "你有遍历nlp.pipe生成的那些doc吗？"
5 |     __msg__.good("好样的!")
6 | 


--------------------------------------------------------------------------------
/exercises/zh/test_03_14_02.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "docs = list(nlp.pipe(TEXTS))" in __solution__
4 |     ), "你有用list将nlp.pipe的结果变为列表吗？"
5 |     __msg__.good("美美哒！")
6 | 


--------------------------------------------------------------------------------
/exercises/zh/test_03_14_03.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "patterns = list(nlp.pipe(people))" in __solution__
4 |     ), "你有用list将nlp.pipe的结果变为列表吗？"
5 | 
6 |     __msg__.good(
7 | 	"干得漂亮！接下来我们看一个实际例子，用nlp.pipe来处理文档生成更多的元数据。"
8 |     )
9 | 


--------------------------------------------------------------------------------
/exercises/zh/test_03_16_01.py:
--------------------------------------------------------------------------------
1 | def test():
2 |     assert (
3 |         "doc = nlp.make_doc(text)" in __solution__
4 |         or "doc = nlp.tokenizer(text)" in __solution__
5 |     ), "你是否仅是对文本做了分词？"
6 | 
7 |     __msg__.good("棒棒哒！")
8 | 


--------------------------------------------------------------------------------
/exercises/zh/test_03_16_02.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         'with nlp.select_pipes(disable=["tagger", "parser"])' in __solution__
 4 |         or 'with nlp.select_pipes(disable=["parser", "tagger"])' in __solution__
 5 |     ), "你是否在nlp.select_pipes中调用了正确的组件？"
 6 | 
 7 |     __msg__.good(
 8 | 	"完美！现在我们已经练习了一些技巧来提高性能，我们可以学习下一个章节，"
 9 | 	"训练一些spaCy的神经网络模型了。"
10 |     )
11 | 


--------------------------------------------------------------------------------
/exercises/zh/test_04_04.py:
--------------------------------------------------------------------------------
 1 | def test():
 2 |     assert (
 3 |         'spacy.blank("zh")' in __solution__
 4 |     ), "你有创建空的中文模型了吗?"
 5 |     assert (
 6 |         "DocBin(docs=docs)" in __solution__
 7 |     ), "你有正确创建DocBin对象吗?"
 8 |     assert "doc_bin.to_disk(" in __solution__, "你有使用方法to_disk吗?"
 9 |     assert "train.spacy" in __solution__, "你确定文件名是正确的吗?"
10 | 
11 |     __msg__.good(
12 | 	"好极了！流程现在没问题了，我们要开始进行训练了。"
13 |     )
14 | 


--------------------------------------------------------------------------------
/exercises/zh/train_gadget.spacy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/exercises/zh/train_gadget.spacy


--------------------------------------------------------------------------------
/exercises/zh/weibo.json:
--------------------------------------------------------------------------------
1 | [
2 |     "我最喜欢吃性价比高的麦当劳了！",
3 |     "我以为麦当劳只有预处理的汉堡，现在我才发现他们家还有生的汉堡？？",
4 |     "为什么各位还在吃麦当劳 :(",
5 |     "中国的麦当劳有老北京鸡肉卷，这也太爽了！",
6 |     "作为一个帅帅的男人，去麦当劳我只吃巨无霸:P",
7 |     "今天早上决定去吃麦当劳套餐，现在胃里涨了一整天了。"
8 | ]
9 | 


--------------------------------------------------------------------------------
/gatsby-browser.js:
--------------------------------------------------------------------------------
1 | // This doesn't have to be here – but if we do import Juniper here, it's already
2 | // preloaded and cached when we dynamically import it in code.js.
3 | import Juniper from './src/components/juniper' // eslint-disable-line no-unused-vars
4 | 


--------------------------------------------------------------------------------
/src/context.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | export const ChapterContext = React.createContext()
4 | export const LocaleContext = React.createContext()
5 | 


--------------------------------------------------------------------------------
/src/pages/de.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => <Home lang="de" />
6 | 


--------------------------------------------------------------------------------
/src/pages/en.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => (
6 |     <Home lang="en" />
7 | )
8 | 


--------------------------------------------------------------------------------
/src/pages/es.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => (
6 |     <Home lang="es" />
7 | )
8 | 


--------------------------------------------------------------------------------
/src/pages/fr.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => <Home lang="fr" />
6 | 


--------------------------------------------------------------------------------
/src/pages/index.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => (
6 |     <Home lang="en" />
7 | )
8 | 


--------------------------------------------------------------------------------
/src/pages/ja.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => (
6 |     <Home lang="ja" />
7 | )
8 | 


--------------------------------------------------------------------------------
/src/pages/pt.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => <Home lang="pt" />
6 | 


--------------------------------------------------------------------------------
/src/pages/zh.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | 
3 | import Home from '../components/home'
4 | 
5 | export default () => (
6 |     <Home lang="zh" />
7 | )
8 | 


--------------------------------------------------------------------------------
/src/styles/chapter.module.sass:
--------------------------------------------------------------------------------
1 | .pagination
2 |     max-width: 100%
3 |     width: var(--width-container)
4 |     margin: 4rem auto 0
5 |     display: flex
6 |     justify-content: space-between
7 | 


--------------------------------------------------------------------------------
/src/styles/link.module.sass:
--------------------------------------------------------------------------------
 1 | .root
 2 |     cursor: pointer
 3 |     border-bottom: 1px solid var(--color-theme)
 4 | 
 5 |     &:hover
 6 |         border-bottom-color: var(--color-front)
 7 | 
 8 | .secondary
 9 |     border-bottom-color: var(--color-subtle-dark)
10 | 
11 |     &:hover
12 |         border-bottom-color: var(--color-front)
13 | 
14 | .hidden
15 |     border-bottom: 0
16 | 


--------------------------------------------------------------------------------
/static/dep_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example.png


--------------------------------------------------------------------------------
/static/dep_example_de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_de.png


--------------------------------------------------------------------------------
/static/dep_example_es.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_es.png


--------------------------------------------------------------------------------
/static/dep_example_fr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_fr.png


--------------------------------------------------------------------------------
/static/dep_example_ja.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_ja.png


--------------------------------------------------------------------------------
/static/dep_example_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/dep_example_zh.png


--------------------------------------------------------------------------------
/static/doc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/doc.png


--------------------------------------------------------------------------------
/static/doc_span.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/doc_span.png


--------------------------------------------------------------------------------
/static/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/icon.png


--------------------------------------------------------------------------------
/static/icon_check.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor">
2 |   <path d="M9 16.172l10.594-10.594 1.406 1.406-12 12-5.578-5.578 1.406-1.406z" />
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/static/ner_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example.png


--------------------------------------------------------------------------------
/static/ner_example_de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_de.png


--------------------------------------------------------------------------------
/static/ner_example_es.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_es.png


--------------------------------------------------------------------------------
/static/ner_example_fr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_fr.png


--------------------------------------------------------------------------------
/static/ner_example_ja.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_ja.png


--------------------------------------------------------------------------------
/static/ner_example_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/ner_example_zh.png


--------------------------------------------------------------------------------
/static/package.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package.png


--------------------------------------------------------------------------------
/static/package_de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_de.png


--------------------------------------------------------------------------------
/static/package_es.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_es.png


--------------------------------------------------------------------------------
/static/package_fr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_fr.png


--------------------------------------------------------------------------------
/static/package_ja.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_ja.png


--------------------------------------------------------------------------------
/static/package_meta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta.png


--------------------------------------------------------------------------------
/static/package_meta_de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_de.png


--------------------------------------------------------------------------------
/static/package_meta_es.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_es.png


--------------------------------------------------------------------------------
/static/package_meta_fr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_fr.png


--------------------------------------------------------------------------------
/static/package_meta_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_meta_zh.png


--------------------------------------------------------------------------------
/static/package_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/package_zh.png


--------------------------------------------------------------------------------
/static/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/pipeline.png


--------------------------------------------------------------------------------
/static/profile.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/profile.jpg


--------------------------------------------------------------------------------
/static/social.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social.jpg


--------------------------------------------------------------------------------
/static/social_de.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_de.jpg


--------------------------------------------------------------------------------
/static/social_es.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_es.jpg


--------------------------------------------------------------------------------
/static/social_fr.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_fr.jpg


--------------------------------------------------------------------------------
/static/social_ja.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_ja.jpg


--------------------------------------------------------------------------------
/static/social_pt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_pt.jpg


--------------------------------------------------------------------------------
/static/social_zh.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/social_zh.jpg


--------------------------------------------------------------------------------
/static/span_indices.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/span_indices.png


--------------------------------------------------------------------------------
/static/training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training.png


--------------------------------------------------------------------------------
/static/training_de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_de.png


--------------------------------------------------------------------------------
/static/training_es.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_es.png


--------------------------------------------------------------------------------
/static/training_fr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_fr.png


--------------------------------------------------------------------------------
/static/training_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/training_zh.png


--------------------------------------------------------------------------------
/static/vocab_stringstore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore.png


--------------------------------------------------------------------------------
/static/vocab_stringstore_de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_de.png


--------------------------------------------------------------------------------
/static/vocab_stringstore_es.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_es.png


--------------------------------------------------------------------------------
/static/vocab_stringstore_fr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_fr.png


--------------------------------------------------------------------------------
/static/vocab_stringstore_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/vocab_stringstore_zh.png


--------------------------------------------------------------------------------
/static/website.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/spacy-course/77d8ee1630788b1fb0df1f80f999f7355941a37f/static/website.png


--------------------------------------------------------------------------------