├── .gitattributes ├── .github └── workflows │ ├── build_and_publish.yml │ └── test.yml ├── .gitignore ├── CITATION.cff ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── doc ├── Makefile ├── _static │ └── css │ │ └── wordcloud.css ├── _templates │ ├── class.rst │ ├── class_with_call.rst │ └── function.rst ├── build-website.sh ├── changelog.rst ├── cli.rst ├── conf.py ├── deploy-website.sh ├── images │ ├── a_new_hope.png │ └── no_image.png ├── index.rst ├── make_a_release.rst ├── references.rst └── requirements-doc.txt ├── examples ├── README.txt ├── a_new_hope.png ├── a_new_hope.py ├── a_new_hope.txt ├── a_new_hope_bigrams.png ├── alice.png ├── alice.txt ├── alice_color.png ├── alice_colored.png ├── alice_license.txt ├── alice_mask.png ├── arabic.py ├── arabic_example.png ├── arabicwords.txt ├── colored.py ├── colored_by_group.png ├── colored_by_group.py ├── constitution.png ├── constitution.txt ├── emoji.py ├── fonts │ ├── NotoNaskhArabic │ │ ├── LICENSE_OFL.txt │ │ ├── NotoNaskhArabic-Regular.ttf │ │ └── README.md │ ├── SourceHanSerif │ │ ├── README.md │ │ └── SourceHanSerifK-Light.otf │ └── Symbola │ │ ├── README.md │ │ ├── Symbola.pdf │ │ └── Symbola.ttf ├── frequency.py ├── happy-emoji.txt ├── masked.py ├── parrot-by-jose-mari-gimenez2.jpg ├── parrot.png ├── parrot.py ├── parrot_new.png ├── simple.py ├── single_word.py ├── stormtrooper_mask.png ├── wc_cn │ ├── CalltoArms.txt │ ├── LuXun.jpg │ ├── LuXun_black.jpg │ ├── LuXun_black_colored.jpg │ ├── LuXun_color.jpg │ ├── LuXun_colored.jpg │ ├── stopwords_cn_en.txt │ └── wc_cn_license.txt ├── wiki_rainbow.txt └── wordcloud_cn.py ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── test ├── conftest.py ├── test_wordcloud.py ├── test_wordcloud_cli.py ├── unicode_stopwords.txt └── unicode_text.txt └── wordcloud ├── DroidSansMono.ttf ├── TODO ├── __init__.py ├── __main__.py ├── color_from_image.py ├── query_integral_image.pyx ├── stopwords ├── tokenization.py ├── wordcloud.py └── wordcloud_cli.py /.gitattributes: -------------------------------------------------------------------------------- 1 | wordcloud/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/workflows/build_and_publish.yml: -------------------------------------------------------------------------------- 1 | name: Build Wheel and Publish for Releases 2 | on: 3 | workflow_dispatch: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | build_wheels: 9 | name: Build wheels on ${{ matrix.os }} 10 | runs-on: ${{ matrix.os }} 11 | env: 12 | CIBW_ARCHS_MACOS: "x86_64 arm64" 13 | 14 | strategy: 15 | matrix: 16 | os: [ubuntu-20.04, windows-2019, macos-13] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - run: | 22 | git fetch --prune --unshallow 23 | 24 | - name: Build wheels 25 | uses: pypa/cibuildwheel@v2.21.3 26 | 27 | - uses: actions/upload-artifact@v4 28 | with: 29 | name: artifact-${{ matrix.os }} 30 | path: ./wheelhouse/*.whl 31 | 32 | build_sdist: 33 | name: Build source distribution 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v4 37 | 38 | - run: | 39 | git fetch --prune --unshallow 40 | 41 | - name: Build sdist 42 | run: pipx run build --sdist 43 | 44 | - uses: actions/upload-artifact@v4 45 | with: 46 | name: artifact-sdist 47 | path: dist/*.tar.gz 48 | 49 | upload_pypi: 50 | needs: [build_wheels, build_sdist] 51 | runs-on: ubuntu-latest 52 | # upload to PyPI on every tag starting with 'v' 53 | # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') 54 | # alternatively, to publish when a GitHub Release is created, use the following rule: 55 | if: github.event_name == 'release' && github.event.action == 'published' 56 | steps: 57 | - uses: actions/download-artifact@v4 58 | with: 59 | pattern: artifact-* 60 | path: dist 61 | merge-multiple: true 62 | 63 | - name: Check dist 64 | run: ls dist 65 | 66 | - uses: pypa/gh-action-pypi-publish@v1.5.0 67 | with: 68 | user: __token__ 69 | password: ${{ secrets.PYPI_TOKEN }} 70 | # To test: repository_url: https://test.pypi.org/legacy/ 71 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: 4 | pull_request: 5 | types: [opened, reopened, synchronize] 6 | 7 | jobs: 8 | test_x86_64: 9 | name: "Build ${{ matrix.pyver }} on ${{ matrix.os }}" 10 | strategy: 11 | matrix: 12 | pyver: ["3.7", "3.9", "3.11", "3.13"] 13 | os: [ubuntu-22.04, windows-latest] 14 | runs-on: ${{ matrix.os }} 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Setting up Python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.pyver }} 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --disable-pip-version-check --upgrade pip 24 | pip install -U -r requirements.txt -r requirements-dev.txt 25 | #Install locally to support tests 26 | pip install -e . 27 | - name: Test with pytest 28 | run: pytest 29 | 30 | test_aarch64: 31 | name: "Build aarch64 ${{ matrix.pyver }}" 32 | strategy: 33 | matrix: 34 | pyver: [cp38-cp38, cp310-cp310, cp312-cp312] 35 | fail-fast: false 36 | runs-on: ubuntu-latest 37 | env: 38 | py: /opt/python/${{ matrix.pyver }}/bin/python 39 | img: quay.io/pypa/manylinux2014_aarch64 40 | steps: 41 | - name: Checkout 42 | uses: actions/checkout@v4 43 | - run: | 44 | docker run --rm --privileged hypriot/qemu-register 45 | - uses: docker://quay.io/pypa/manylinux2014_aarch64 46 | with: 47 | args: | 48 | bash -c "${{ env.py }} -m pip install virtualenv && ${{ env.py }} -m venv .env && \ 49 | source .env/bin/activate && \ 50 | pip install --upgrade setuptools && \ 51 | python -m pip install --disable-pip-version-check --upgrade pip && \ 52 | pip install -U -r requirements.txt -r requirements-dev.txt && \ 53 | pip install -e . && \ 54 | pytest && \ 55 | deactivate" 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | doc/_build 2 | doc/auto_examples 3 | doc/gen_modules 4 | doc/generated 5 | 6 | # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig 7 | 8 | # Created by https://www.gitignore.io/api/pycharm,python,visualstudiocode 9 | # Edit at https://www.gitignore.io/?templates=pycharm,python,visualstudiocode 10 | 11 | ### PyCharm ### 12 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 13 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 14 | 15 | # User-specific stuff 16 | .idea/**/workspace.xml 17 | .idea/**/tasks.xml 18 | .idea/**/usage.statistics.xml 19 | .idea/**/dictionaries 20 | .idea/**/shelf 21 | 22 | # Generated files 23 | .idea/**/contentModel.xml 24 | 25 | # Sensitive or high-churn files 26 | .idea/**/dataSources/ 27 | .idea/**/dataSources.ids 28 | .idea/**/dataSources.local.xml 29 | .idea/**/sqlDataSources.xml 30 | .idea/**/dynamic.xml 31 | .idea/**/uiDesigner.xml 32 | .idea/**/dbnavigator.xml 33 | 34 | # Gradle 35 | .idea/**/gradle.xml 36 | .idea/**/libraries 37 | 38 | # Gradle and Maven with auto-import 39 | # When using Gradle or Maven with auto-import, you should exclude module files, 40 | # since they will be recreated, and may cause churn. Uncomment if using 41 | # auto-import. 42 | # .idea/modules.xml 43 | # .idea/*.iml 44 | # .idea/modules 45 | 46 | # CMake 47 | cmake-build-*/ 48 | 49 | # Mongo Explorer plugin 50 | .idea/**/mongoSettings.xml 51 | 52 | # File-based project format 53 | *.iws 54 | 55 | # IntelliJ 56 | out/ 57 | 58 | # mpeltonen/sbt-idea plugin 59 | .idea_modules/ 60 | 61 | # VSCode 62 | .vscode 63 | 64 | # JIRA plugin 65 | atlassian-ide-plugin.xml 66 | 67 | # Cursive Clojure plugin 68 | .idea/replstate.xml 69 | 70 | # Crashlytics plugin (for Android Studio and IntelliJ) 71 | com_crashlytics_export_strings.xml 72 | crashlytics.properties 73 | crashlytics-build.properties 74 | fabric.properties 75 | 76 | # Editor-based Rest Client 77 | .idea/httpRequests 78 | 79 | # Android studio 3.1+ serialized cache file 80 | .idea/caches/build_file_checksums.ser 81 | 82 | ### PyCharm Patch ### 83 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 84 | 85 | # *.iml 86 | # modules.xml 87 | # .idea/misc.xml 88 | # *.ipr 89 | 90 | # Sonarlint plugin 91 | .idea/sonarlint 92 | 93 | ### Python ### 94 | # Byte-compiled / optimized / DLL files 95 | __pycache__/ 96 | *.py[cod] 97 | *$py.class 98 | 99 | # C extensions 100 | *.so 101 | 102 | # Distribution / packaging 103 | .Python 104 | build/ 105 | develop-eggs/ 106 | dist/ 107 | downloads/ 108 | eggs/ 109 | .eggs/ 110 | lib/ 111 | lib64/ 112 | parts/ 113 | sdist/ 114 | var/ 115 | wheels/ 116 | share/python-wheels/ 117 | *.egg-info/ 118 | .installed.cfg 119 | *.egg 120 | MANIFEST 121 | 122 | # PyInstaller 123 | # Usually these files are written by a python script from a template 124 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 125 | *.manifest 126 | *.spec 127 | 128 | # Installer logs 129 | pip-log.txt 130 | pip-delete-this-directory.txt 131 | 132 | # Unit test / coverage reports 133 | htmlcov/ 134 | .tox/ 135 | .nox/ 136 | .coverage 137 | .coverage.* 138 | .cache 139 | nosetests.xml 140 | coverage.xml 141 | *.cover 142 | .hypothesis/ 143 | .pytest_cache/ 144 | 145 | # Translations 146 | *.mo 147 | *.pot 148 | 149 | # Django stuff: 150 | *.log 151 | local_settings.py 152 | db.sqlite3 153 | 154 | # Flask stuff: 155 | instance/ 156 | .webassets-cache 157 | 158 | # Scrapy stuff: 159 | .scrapy 160 | 161 | # Sphinx documentation 162 | docs/_build/ 163 | 164 | # PyBuilder 165 | target/ 166 | 167 | # Jupyter Notebook 168 | .ipynb_checkpoints 169 | 170 | # IPython 171 | profile_default/ 172 | ipython_config.py 173 | 174 | # pyenv 175 | .python-version 176 | 177 | # celery beat schedule file 178 | celerybeat-schedule 179 | 180 | # SageMath parsed files 181 | *.sage.py 182 | 183 | # Environments 184 | .env 185 | .venv 186 | env/ 187 | venv/ 188 | ENV/ 189 | env.bak/ 190 | venv.bak/ 191 | 192 | # Spyder project settings 193 | .spyderproject 194 | .spyproject 195 | 196 | # Rope project settings 197 | .ropeproject 198 | 199 | # mkdocs documentation 200 | /site 201 | 202 | # mypy 203 | .mypy_cache/ 204 | .dmypy.json 205 | dmypy.json 206 | 207 | # Pyre type checker 208 | .pyre/ 209 | 210 | ### Python Patch ### 211 | .venv/ 212 | 213 | ### VisualStudioCode ### 214 | .vscode/* 215 | !.vscode/settings.json 216 | !.vscode/tasks.json 217 | !.vscode/launch.json 218 | !.vscode/extensions.json 219 | 220 | ### VisualStudioCode Patch ### 221 | # Ignore all local history of files 222 | .history 223 | 224 | # End of https://www.gitignore.io/api/pycharm,python,visualstudiocode 225 | 226 | # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) 227 | 228 | wordcloud/_version.py 229 | wordcloud/query_integral_image.c 230 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | authors: 3 | - family-names: "Mueller" 4 | given-names: "Andreas C" 5 | orcid: "https://orcid.org/0000-0002-2349-9428" 6 | 7 | title: "Wordcloud" 8 | version: 1.9.1 9 | date-released: 2023-4-27 10 | url: "https://github.com/amueller/wordcloud" -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | To contribute to wordcloud, you'll need to follow the instructions in 4 | [Creating a pull request from a fork](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork). 5 | 6 | In addition to the general procedure for creating a pull request, please follow 7 | the following steps: 8 | 9 | ## Before starting development 10 | 11 | ### Use a correct version of Python 12 | 13 | Python 3.7.x should be fine for development. 14 | 15 | ``` 16 | python --version 17 | > Python 3.7.6 18 | ``` 19 | 20 | ### Install all dependencies 21 | 22 | ``` 23 | pip install -U -r requirements.txt -r requirements-dev.txt 24 | ``` 25 | 26 | ### Ensure that files are correctly formatted 27 | 28 | ``` 29 | flake8 30 | ``` 31 | 32 | ### Ensure that tests pass 33 | 34 | ``` 35 | pip install -e . 36 | pytest 37 | ``` 38 | 39 | ## Before creating a pull request 40 | 41 | ### Confirm formatting and test passage 42 | 43 | ``` 44 | flake8 45 | pytest 46 | ``` 47 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | #### Description 2 | 3 | 4 | #### Steps/Code to Reproduce 5 | 25 | 26 | #### Expected Results 27 | 28 | 29 | #### Actual Results 30 | 31 | 32 | #### Versions 33 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Andreas Christian Mueller 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include wordcloud/_version.py 2 | exclude wordcloud/*.c 3 | exclude wordcloud/TODO 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![licence](http://img.shields.io/badge/licence-MIT-blue.svg?style=flat)](https://github.com/amueller/word_cloud/blob/master/LICENSE) 2 | [![DOI](https://zenodo.org/badge/21369/amueller/word_cloud.svg)](https://zenodo.org/badge/latestdoi/21369/amueller/word_cloud) 3 | 4 | 5 | word_cloud 6 | ========== 7 | 8 | A little word cloud generator in Python. Read more about it on the [blog 9 | post][blog-post] or the [website][website]. 10 | 11 | The code is tested against Python 3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13. 12 | 13 | ## Installation 14 | 15 | If you are using pip: 16 | 17 | pip install wordcloud 18 | 19 | If you are using conda, you can install from the `conda-forge` channel: 20 | 21 | conda install -c conda-forge wordcloud 22 | 23 | 24 | #### Installation notes 25 | 26 | wordcloud depends on `numpy`, `pillow`, and `matplotlib`. 27 | 28 | If there are no wheels available for your version of python, installing the 29 | package requires having a C compiler set up. Before installing a compiler, report 30 | an issue describing the version of python and operating system being used. 31 | 32 | 33 | ## Examples 34 | 35 | Check out [examples/simple.py][simple] for a short intro. A sample output is: 36 | 37 | ![Constitution](examples/constitution.png) 38 | 39 | Or run [examples/masked.py][masked] to see more options. A sample output is: 40 | 41 | ![Alice in Wonderland](examples/alice.png) 42 | 43 | Getting fancy with some colors: 44 | ![Parrot with rainbow colors](examples/parrot_new.png) 45 | 46 | Generating wordclouds for Arabic: 47 | 48 | ![Arabic wordlcloud](examples/arabic_example.png) 49 | 50 | 51 | ## Command-line usage 52 | 53 | The `wordcloud_cli` tool can be used to generate word clouds directly from the command-line: 54 | 55 | $ wordcloud_cli --text mytext.txt --imagefile wordcloud.png 56 | 57 | If you're dealing with PDF files, then `pdftotext`, included by default with many Linux distribution, comes in handy: 58 | 59 | $ pdftotext mydocument.pdf - | wordcloud_cli --imagefile wordcloud.png 60 | 61 | In the previous example, the `-` argument orders `pdftotext` to write the resulting text to stdout, which is then piped to the stdin of `wordcloud_cli.py`. 62 | 63 | Use `wordcloud_cli --help` so see all available options. 64 | 65 | [blog-post]: http://peekaboo-vision.blogspot.de/2012/11/a-wordcloud-in-python.html 66 | [website]: http://amueller.github.io/word_cloud/ 67 | [simple]: examples/simple.py 68 | [masked]: examples/masked.py 69 | [reddit-cloud]: https://github.com/amueller/reddit-cloud 70 | [wc2]: http://www.reddit.com/user/WordCloudBot2 71 | [wc2top]: http://www.reddit.com/user/WordCloudBot2/?sort=top 72 | [chat-stats]: https://github.com/popcorncolonel/Chat_stats 73 | [twitter-word-cloud-bot]: https://github.com/defacto133/twitter-wordcloud-bot 74 | [twitter-wordnuvola]: https://twitter.com/wordnuvola 75 | [imgur-wordnuvola]: http://defacto133.imgur.com/all/ 76 | [intprob]: http://peekaboo-vision.blogspot.de/2012/11/a-wordcloud-in-python.html#bc_0_28B 77 | 78 | 79 | ## Licensing 80 | The wordcloud library is MIT licenced, but contains DroidSansMono.ttf, a true type font by Google, that is apache licensed. 81 | The font is by no means integral, and any other font can be used by setting the ``font_path`` variable when creating a ``WordCloud`` object. 82 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | all: html-noplot 20 | 21 | help: 22 | @echo "Please use \`make ' where is one of" 23 | @echo " html to make standalone HTML files" 24 | @echo " dirhtml to make HTML files named index.html in directories" 25 | @echo " singlehtml to make a single large HTML file" 26 | @echo " pickle to make pickle files" 27 | @echo " json to make JSON files" 28 | @echo " htmlhelp to make HTML files and a HTML help project" 29 | @echo " qthelp to make HTML files and a qthelp project" 30 | @echo " devhelp to make HTML files and a Devhelp project" 31 | @echo " epub to make an epub" 32 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 33 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " linkcheck to check all external links for integrity" 41 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 42 | 43 | clean: 44 | -rm -rf $(BUILDDIR)/* 45 | -rm -rf auto_examples 46 | -rm -rf gen_modules 47 | -rm -rf generated 48 | 49 | html: 50 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 53 | 54 | html-noplot: 55 | $(SPHINXBUILD) -D plot_gallery=False -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/wordcloud.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/wordcloud.qhc" 93 | 94 | devhelp: 95 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 96 | @echo 97 | @echo "Build finished." 98 | @echo "To view the help file:" 99 | @echo "# mkdir -p $$HOME/.local/share/devhelp/wordcloud" 100 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/wordcloud" 101 | @echo "# devhelp" 102 | 103 | epub: 104 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 105 | @echo 106 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 107 | 108 | latex: 109 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 110 | @echo 111 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 112 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 113 | "(use \`make latexpdf' here to do that automatically)." 114 | 115 | latexpdf: 116 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 117 | @echo "Running LaTeX files through pdflatex..." 118 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 119 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 120 | 121 | text: 122 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 123 | @echo 124 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 125 | 126 | man: 127 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 128 | @echo 129 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 130 | 131 | texinfo: 132 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 133 | @echo 134 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 135 | @echo "Run \`make' in that directory to run these through makeinfo" \ 136 | "(use \`make info' here to do that automatically)." 137 | 138 | info: 139 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 140 | @echo "Running Texinfo files through makeinfo..." 141 | make -C $(BUILDDIR)/texinfo info 142 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 143 | 144 | gettext: 145 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 146 | @echo 147 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 148 | 149 | changes: 150 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 151 | @echo 152 | @echo "The overview file is in $(BUILDDIR)/changes." 153 | 154 | linkcheck: 155 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 156 | @echo 157 | @echo "Link check complete; look for any errors in the above output " \ 158 | "or in $(BUILDDIR)/linkcheck/output.txt." 159 | 160 | doctest: 161 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 162 | @echo "Testing of doctests in the sources finished, look at the " \ 163 | "results in $(BUILDDIR)/doctest/output.txt." 164 | -------------------------------------------------------------------------------- /doc/_static/css/wordcloud.css: -------------------------------------------------------------------------------- 1 | @import url("theme.css"); 2 | 3 | .section .align-default { 4 | text-align: left; 5 | } 6 | -------------------------------------------------------------------------------- /doc/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | 13 | -------------------------------------------------------------------------------- /doc/_templates/class_with_call.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | .. automethod:: __call__ 11 | {% endblock %} 12 | 13 | 14 | -------------------------------------------------------------------------------- /doc/_templates/function.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | 9 | -------------------------------------------------------------------------------- /doc/build-website.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | 6 | err() { echo -e >&2 ERROR: $@\\n; } 7 | die() { err $@; exit 1; } 8 | 9 | SCRIPT_DIR=$(cd $(dirname $0) || exit 1; pwd) 10 | 11 | cd $SCRIPT_DIR/../ 12 | 13 | if [ ! -d .git ]; then 14 | die "Failed to locate the root of the current git-versioned project" 15 | fi 16 | 17 | SOURCE_SHA_REF=$(git rev-parse --short HEAD) 18 | 19 | pushd doc 20 | pip install -r requirements-doc.txt 21 | make clean 22 | make html 23 | echo "sha:${SOURCE_SHA_REF}" >> _build/html/.buildinfo 24 | popd 25 | 26 | -------------------------------------------------------------------------------- /doc/changelog.rst: -------------------------------------------------------------------------------- 1 | .. _changelog: 2 | 3 | ========= 4 | Changelog 5 | ========= 6 | 7 | This is the list of changes to wordcloud between each release. For full 8 | details, see the commit logs at https://github.com/amueller/word_cloud 9 | 10 | Next Release 11 | ============== 12 | 13 | WordCloud 1.9.1 14 | =============== 15 | Release Date 4/27/2023 16 | 17 | Wheels 18 | ------ 19 | * Added wheels for Python 3.10 and 3.11 20 | 21 | WordCloud 1.8.1 22 | =============== 23 | Release Date 11/11/2020 24 | 25 | Wheels 26 | ------ 27 | * Added wheels for Python 3.9. 28 | 29 | 30 | WordCloud 1.8.0 31 | =============== 32 | 33 | Wheels 34 | ------ 35 | 36 | * Add support for building wheels for Python 3.8 for all platforms and 32-bit wheels for windows **only**. 37 | See :issue:`547` and :issue:`549`. Contributed by :user:`amueller` and :user:`jcfr`. 38 | 39 | Test 40 | ---- 41 | 42 | * Update CircleCI configuration to use `dockcross/manylinux1-x64 `_ 43 | image instead of obsolete `dockcross/manylinux-x64` one. See :issue:`548`. Contributed by :user:`jcfr`. 44 | 45 | WordCloud 1.7.0 46 | =============== 47 | 48 | Features 49 | -------- 50 | * Add export of SVG files using :func:`WordCloud.to_svg` by :user:`jojolebarjos` . 51 | * Add missing options to the command line interface, `PR #527 `_ by :user:`dm-logv`. 52 | 53 | Bug fixes 54 | --------- 55 | * Make bigrams stopword aware, `PR #528`_ by :user:`carlgieringer`. 56 | 57 | 58 | WordCloud 1.6.0 59 | =============== 60 | 61 | Features 62 | -------- 63 | 64 | * Add support to render numbers and single letters using the 65 | ``include_numbers`` and ``min_word_length`` arguments. 66 | 67 | Examples 68 | -------- 69 | * Add :ref:`phx_glr_auto_examples_parrot.py` example showing another example of 70 | image-based coloring and masks. 71 | 72 | WordCloud 1.5.0 73 | =============== 74 | 75 | Examples 76 | -------- 77 | 78 | * Add :ref:`sphx_glr_auto_examples_frequency.py` example for understanding how 79 | to generate a wordcloud using a dictionary of word frequency. 80 | Contributed by :user:`yoonsubKim`. 81 | 82 | * Add :ref:`sphx_glr_auto_examples_wordcloud_cn.py` example. 83 | Contributed by :user:`FontTian` and improved by :user:`duohappy`. 84 | 85 | Features 86 | -------- 87 | 88 | * Add support for mask contour. Contributed by :user:`jsmedmar`. 89 | 90 | * Improve :ref:`wordcloud_cli` adding support for ``--contour_width`` 91 | and ``--contour_color`` named arguments. 92 | 93 | * Improve :class:`wordcloud.WordCloud` API adding support for 94 | ``contour_width`` and ``contour_color`` keyword arguments. 95 | 96 | * Update :ref:`sphx_glr_auto_examples_masked.py` example. 97 | 98 | * Update :class:`wordcloud.WordCloud` to support ``repeat`` keyword argument. 99 | If set to True, indicates whether to repeat words and phrases until ``max_words`` 100 | or ``min_font_size`` is reached. Contributed by :user:`amueller`. 101 | 102 | Wheels 103 | ------ 104 | 105 | * Support installation on Linux, macOS and Windows for Python 2.7, 3.4, 3.5, 3.6 and 3.7 by 106 | updating the Continuous Integration (CI) infrastructure and support the automatic creation 107 | and upload of wheels to `PyPI`_. Contributed by :user:`jcfr`. 108 | 109 | * Use `scikit-ci`_ to simplify and centralize the CI configuration. By having ``appveyor.yml``, 110 | ``.circleci/config.yml`` and ``.travis.yml`` calling the scikit-ci command-line executable, 111 | all the CI steps for all service are described in one `scikit-ci.yml`_ configuration file. 112 | 113 | * Use `scikit-ci-addons`_ to provide a set of scripts useful to help drive CI. 114 | 115 | * Simplify release process using `versioneer`_. Release process is now as simple as 116 | tagging a release, there is no need to manually update version in ``__init__.py``. 117 | 118 | * Remove use of miniconda and instead use `manylinux`_ docker images. 119 | 120 | * Fix installation of the cli on all platforms leveraging `entry_points`_. 121 | See :issue:`420`. Contributed by :user:`jcfr`. 122 | 123 | .. _manylinux: https://www.python.org/dev/peps/pep-0571/ 124 | .. _PyPI: https://pypi.org/project/wordcloud 125 | .. _scikit-ci: http://scikit-ci.readthedocs.io 126 | .. _scikit-ci-addons: http://scikit-ci-addons.readthedocs.io 127 | .. _scikit-ci.yml: https://github.com/amueller/word_cloud/blob/master/scikit-ci.yml 128 | .. _versioneer: https://github.com/warner/python-versioneer/ 129 | .. _entry_points: https://setuptools.readthedocs.io/en/latest/setuptools.html#automatic-script-creation 130 | 131 | Bug fixes 132 | --------- 133 | 134 | * :class:`wordcloud.WordCloud` API 135 | 136 | * Fix coloring with black image. Contributed by :user:`amueller`. 137 | 138 | * Improve error message when there is no space on canvas. Contributed by :user:`amueller`. 139 | 140 | * :ref:`wordcloud_cli` 141 | 142 | * Fix handling of invalid `regexp` parameter. Contributed by :user:`jcfr`. 143 | 144 | Documentation 145 | ------------- 146 | 147 | * Update :class:`wordcloud.WordCloud` ``color_func`` keyword argument documentation 148 | explaining how to create single color word cloud. 149 | Fix :issue:`185`. Contributed by :user:`maifeng`. 150 | 151 | * Simplify and improve `README `_. 152 | Contributed by :user:`amueller`. 153 | 154 | * Add :ref:`wordcloud_cli` document. Contributed by :user:`amueller`. 155 | 156 | * Add :ref:`making_a_release` and :ref:`changelog` documents. Contributed by :user:`jcfr`. 157 | 158 | * Improve sphinx gallery integration. Contributed by :user:`jcfr`. 159 | 160 | Website 161 | ------- 162 | 163 | * Setup automatic deployment of the website each time the `master` branch is updated. 164 | Contributed by :user:`jcfr`. 165 | 166 | * Update `website `_ to use `Read the Docs Sphinx Theme`. 167 | Contributed by :user:`amueller`. 168 | 169 | Test 170 | ---- 171 | 172 | * Update testing infrastructure. Contributed by :user:`jcfr`. 173 | 174 | * Switch testing framework from nose to `pytest `_. 175 | 176 | * Enforce coding style by running `flake8 `_ 177 | each time a Pull Request is proposed or the `master` branch updated. 178 | 179 | * Support generating html coverage report locally running ``pytest``, ``coverage html`` and 180 | opening ``htmlcov/index.html`` document. 181 | 182 | 183 | WordCloud 1.4.1 184 | =============== 185 | 186 | Bug fixes 187 | --------- 188 | 189 | * Improve stopwords list. Contributed by :user:`xuhdev`. 190 | 191 | 192 | Test 193 | ---- 194 | 195 | * Remove outdated channel and use conda-forge. Contributed by :user:`amueller`. 196 | 197 | * Add test for the command line utility. Contributed by :user:`xuhdev`. 198 | 199 | 200 | WordCloud 1.4.0 201 | =============== 202 | 203 | See https://github.com/amueller/word_cloud/compare/1.3.3...1.4 204 | 205 | 206 | WordCloud 1.3.3 207 | =============== 208 | 209 | See https://github.com/amueller/word_cloud/compare/1.3.2...1.3.3 210 | 211 | 212 | WordCloud 1.3.2 213 | =============== 214 | 215 | See https://github.com/amueller/word_cloud/compare/1.2.2...1.3.2 216 | 217 | 218 | WordCloud 1.2.2 219 | =============== 220 | 221 | See https://github.com/amueller/word_cloud/compare/1.2.1...1.2.2 222 | 223 | 224 | WordCloud 1.2.1 225 | =============== 226 | 227 | See https://github.com/amueller/word_cloud/compare/4c7ebf81...1.2.1 228 | -------------------------------------------------------------------------------- /doc/cli.rst: -------------------------------------------------------------------------------- 1 | .. _wordcloud_cli: 2 | 3 | Command Line Interface 4 | ====================== 5 | 6 | .. argparse:: 7 | :module: wordcloud.wordcloud_cli 8 | :func: make_parser 9 | :prog: wordcloud_cli 10 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # wordcloud documentation build configuration file, created by 4 | # sphinx-quickstart on Fri May 3 17:14:50 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing 7 | # dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | from datetime import date 19 | 20 | import wordcloud 21 | 22 | # If extensions (or modules to document with autodoc) are in another directory, 23 | # add these directories to sys.path here. If the directory is relative to the 24 | # documentation root, use os.path.abspath to make it absolute, like shown here. 25 | sys.path.insert(0, os.path.abspath('sphinxext')) 26 | 27 | # -- General configuration ---------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 31 | extensions = [ 32 | 'sphinx_gallery.gen_gallery', 33 | 'sphinx_issues', 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.autosummary', 36 | 'sphinx.ext.doctest', 37 | 'sphinx.ext.viewcode', 38 | 'sphinxarg.ext', 39 | 'numpydoc' 40 | ] 41 | 42 | autosummary_generate = True 43 | 44 | autodoc_default_flags = ['members', 'inherited-members'] 45 | numpydoc_class_members_toctree = False 46 | 47 | # generate autosummary even if no references 48 | autosummary_generate = True 49 | 50 | # Add any paths that contain templates here, relative to this directory. 51 | templates_path = ['_templates'] 52 | 53 | # The suffix of source filenames. 54 | source_suffix = '.rst' 55 | 56 | # The encoding of source files. 57 | # source_encoding = 'utf-8-sig' 58 | 59 | # The master toctree document. 60 | master_doc = 'index' 61 | 62 | # General information about the project. 63 | project = u'wordcloud' 64 | copyright = u'%s, Andreas Mueller' % date.today().year 65 | 66 | issues_github_path = 'amueller/word_cloud' 67 | 68 | sphinx_gallery_conf = { 69 | 'backreferences_dir': 'gen_modules/backreferences', 70 | # path to your examples scripts 71 | 'examples_dirs': '../examples', 72 | # path where to save gallery generated examples 73 | 'gallery_dirs': 'auto_examples', 74 | 'filename_pattern': r'/\w+', 75 | 'doc_module': ('wordcloud',), 76 | 'reference_url': { 77 | # The module you locally document uses None 78 | 'wordcloud': None, 79 | } 80 | } 81 | 82 | # The version info for the project you're documenting, acts as replacement for 83 | # |version| and |release|, also used in various other places throughout the 84 | # built documents. 85 | # 86 | # The short X.Y version. 87 | version = wordcloud.__version__ 88 | # The full version, including alpha/beta/rc tags. 89 | release = wordcloud.__version__ 90 | 91 | 92 | # List of patterns, relative to source directory, that match files and 93 | # directories to ignore when looking for source files. 94 | exclude_patterns = ['_build', '_templates', '_themes'] 95 | 96 | 97 | # The name of the Pygments (syntax highlighting) style to use. 98 | pygments_style = 'sphinx' 99 | 100 | # A list of ignored prefixes for module index sorting. 101 | # modindex_common_prefix = [] 102 | 103 | 104 | # -- Options for HTML output -------------------------------------------------- 105 | 106 | # The theme to use for HTML and HTML Help pages. See the documentation for 107 | # a list of builtin themes. 108 | html_theme = 'sphinx_rtd_theme' 109 | html_style = "css/wordcloud.css" 110 | 111 | # Add any paths that contain custom themes here, relative to this directory. 112 | # html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 113 | 114 | # The name for this set of Sphinx documents. If None, it defaults to 115 | # " v documentation". 116 | # html_title = None 117 | 118 | # A shorter title for the navigation bar. Default is the same as html_title. 119 | # html_short_title = None 120 | 121 | # The name of an image file (relative to this directory) to place at the top 122 | # of the sidebar. 123 | # html_logo = None 124 | 125 | # The name of an image file (within the static path) to use as favicon of the 126 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 127 | # pixels large. 128 | # html_favicon = None 129 | 130 | # Add any paths that contain custom static files (such as style sheets) here, 131 | # relative to this directory. They are copied after the builtin static files, 132 | # so a file named "default.css" will overwrite the builtin "default.css". 133 | html_static_path = ['_static'] 134 | 135 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 136 | # using the given strftime format. 137 | # html_last_updated_fmt = '%b %d, %Y' 138 | 139 | # If true, SmartyPants will be used to convert quotes and dashes to 140 | # typographically correct entities. 141 | # html_use_smartypants = True 142 | 143 | # Custom sidebar templates, maps document names to template names. 144 | # html_sidebars = {} 145 | 146 | # Additional templates that should be rendered to pages, maps page names to 147 | # template names. 148 | # html_additional_pages = {} 149 | 150 | # If false, no module index is generated. 151 | # html_domain_indices = True 152 | 153 | # If false, no index is generated. 154 | # html_use_index = True 155 | 156 | # If true, the index is split into individual pages for each letter. 157 | # html_split_index = False 158 | 159 | # If true, links to the reST sources are added to the pages. 160 | # html_show_sourcelink = True 161 | 162 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 163 | # html_show_sphinx = True 164 | 165 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 166 | # html_show_copyright = True 167 | 168 | # If true, an OpenSearch description file will be output, and all pages will 169 | # contain a tag referring to it. The value of this option must be the 170 | # base URL from which the finished HTML is served. 171 | # html_use_opensearch = '' 172 | 173 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 174 | # html_file_suffix = None 175 | 176 | # Output file base name for HTML help builder. 177 | htmlhelp_basename = 'wordclouddoc' 178 | 179 | 180 | # -- Options for LaTeX output ------------------------------------------------- 181 | 182 | latex_elements = { 183 | # The paper size ('letterpaper' or 'a4paper'). 184 | # 'papersize': 'letterpaper', 185 | 186 | # The font size ('10pt', '11pt' or '12pt'). 187 | # 'pointsize': '10pt', 188 | 189 | # Additional stuff for the LaTeX preamble. 190 | # 'preamble': '', 191 | } 192 | 193 | # Grouping the document tree into LaTeX files. List of tuples 194 | # (source start file, target name, title, author, documentclass 195 | # [howto/manual]). 196 | # latex_documents = [('index', 'wordcloud.tex', u'wordcloud Documentation', 197 | # u'Andreas Mueller', 'manual'), ] 198 | 199 | # The name of an image file (relative to this directory) to place at the top of 200 | # the title page. 201 | # latex_logo = None 202 | 203 | # For "manual" documents, if this is true, then toplevel headings are parts, 204 | # not chapters. 205 | # latex_use_parts = False 206 | 207 | # If true, show page references after internal links. 208 | # latex_show_pagerefs = False 209 | 210 | # If true, show URL addresses after external links. 211 | # latex_show_urls = False 212 | 213 | # Documents to append as an appendix to all manuals. 214 | # latex_appendices = [] 215 | 216 | # If false, no module index is generated. 217 | # latex_domain_indices = True 218 | 219 | 220 | # -- Options for manual page output ------------------------------------------- 221 | 222 | # One entry per manual page. List of tuples 223 | # (source start file, name, description, authors, manual section). 224 | man_pages = [ 225 | ('index', 'wordcloud', u'wordcloud Documentation', 226 | [u'Andreas Mueller'], 1) 227 | ] 228 | 229 | # If true, show URL addresses after external links. 230 | # man_show_urls = False 231 | 232 | 233 | # -- Options for Texinfo output ----------------------------------------------- 234 | 235 | # Grouping the document tree into Texinfo files. List of tuples 236 | # (source start file, target name, title, author, 237 | # dir menu entry, description, category) 238 | texinfo_documents = [ 239 | ('index', 'wordcloud', u'wordcloud Documentation', u'Andreas Mueller', 240 | 'wordcloud', 'One line description of project.', 'Miscellaneous'), 241 | ] 242 | 243 | # Documents to append as an appendix to all manuals. 244 | # texinfo_appendices = [] 245 | 246 | # If false, no module index is generated. 247 | # texinfo_domain_indices = True 248 | 249 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 250 | # texinfo_show_urls = 'footnote' 251 | 252 | 253 | # Theme options are theme-specific and customize the look and feel of a 254 | # theme further. 255 | html_theme_options = { 256 | } 257 | 258 | def setup(app): 259 | # a copy button to copy snippet of code from the documentation 260 | app.add_stylesheet("basic.css") 261 | -------------------------------------------------------------------------------- /doc/deploy-website.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | 6 | PROG=$(basename $0) 7 | 8 | BOT_USER_NAME=scikit-build-bot 9 | BOT_USER_EMAIL=scikit-build-bot@scikit-build.org 10 | TARGET_BRANCH=gh-pages 11 | 12 | err() { echo -e >&2 ERROR: $@\\n; } 13 | die() { err $@; exit 1; } 14 | 15 | #------------------------------------------------------------------------------- 16 | help() { 17 | cat >&2 < [--html-dir /path/to/html] 19 | 20 | Publish directory to the $TARGET_BRANCH branch of a GitHub repository. 21 | 22 | Arguments: 23 | repository slug where the files should be pushed. 24 | simple string referencing the repository from which 25 | the files were generated. 26 | 27 | Options: 28 | --html-dir path to directory containing files to publish 29 | (default: doc/_build/html). 30 | 31 | Env. variables: 32 | GITHUB_TOKEN this environment variable is expected to be set. 33 | 34 | 35 | Example: 36 | 37 | GITHUB_TOKEN=xxxx $PROG amueller/word_cloud $(git rev-parse --short HEAD) 38 | 39 | 40 | Notes: 41 | 42 | The username and email associated with the commit correspond to 43 | $BOT_USER_NAME and $BOT_USER_EMAIL. 44 | 45 | ENDHELP 46 | } 47 | 48 | #------------------------------------------------------------------------------- 49 | if [[ -z $GITHUB_TOKEN ]]; then 50 | err "skipping because GITHUB_TOKEN env. variable is not set" 51 | help 52 | exit 1 53 | fi 54 | 55 | if [[ $# -lt 2 ]]; then 56 | err "Missing org/name and source_sha parameters" 57 | help 58 | exit 1 59 | fi 60 | 61 | # Parse arguments 62 | repo_slug=$1 63 | source_sha=$2 64 | shift 2 65 | 66 | # Default values 67 | html_dir=doc/_build/html 68 | 69 | # Parse options 70 | while [[ $# != 0 ]]; do 71 | case $1 in 72 | --html-dir) 73 | html_dir=$2 74 | shift 2 75 | ;; 76 | --help|-h) 77 | help 78 | exit 0 79 | ;; 80 | -*) 81 | err Unknown option \"$1\" 82 | help 83 | exit 1 84 | ;; 85 | *) 86 | break 87 | ;; 88 | esac 89 | done 90 | 91 | echo "repo_slug [$repo_slug]" 92 | echo "source_sha [$source_sha]" 93 | echo "html_dir [$html_dir]" 94 | 95 | #------------------------------------------------------------------------------- 96 | cd $html_dir 97 | 98 | # download current version 99 | git clone git://github.com/$repo_slug -b $TARGET_BRANCH --depth 1 current_html 100 | 101 | # ... and only keep associated history 102 | mv current_html/.git . 103 | rm -rf current_html 104 | 105 | # add special file disabling Jekyll processing 106 | touch .nojekyll 107 | 108 | # configure git 109 | pushURL=https://$GITHUB_TOKEN@github.com/$repo_slug 110 | git config --add remote.origin.pushURL $pushURL 111 | git config user.email $BOT_USER_EMAIL 112 | git config user.name $BOT_USER_NAME 113 | 114 | # commit 115 | git add --all 116 | git commit -m "Website update based of $repo_slug@${source_sha} 117 | 118 | It was automatically generated and published using the script '${PROG}' 119 | 120 | [ci skip] 121 | " 122 | 123 | # publish 124 | git push origin HEAD:gh-pages 125 | 126 | -------------------------------------------------------------------------------- /doc/images/a_new_hope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/doc/images/a_new_hope.png -------------------------------------------------------------------------------- /doc/images/no_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/doc/images/no_image.png -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | WordCloud for Python documentation 2 | ================================== 3 | 4 | Here you find instructions on how to create wordclouds with my Python wordcloud project. 5 | Compared to other wordclouds, my algorithm has the advantage of 6 | 7 | * filling all available space. 8 | * being able to use arbitrary masks. 9 | * having a stupid simple algorithm (with an efficient implementation) that can be easily modified. 10 | * being in Python 11 | 12 | Check out the :ref:`example_gallery`. 13 | 14 | The code of the project is on Github: `word_cloud `_ 15 | 16 | .. figure:: images/a_new_hope.png 17 | :width: 300px 18 | :target: auto_examples/a_new_hope.html 19 | :align: center 20 | 21 | .. toctree:: 22 | :hidden: 23 | :caption: User Documentation 24 | 25 | references 26 | cli 27 | auto_examples/index 28 | changelog 29 | 30 | .. toctree:: 31 | :hidden: 32 | :caption: Contributor Documentation 33 | 34 | make_a_release 35 | -------------------------------------------------------------------------------- /doc/make_a_release.rst: -------------------------------------------------------------------------------- 1 | .. _making_a_release: 2 | 3 | ================ 4 | Making a release 5 | ================ 6 | 7 | This document guides a contributor through creating a release of the wordcloud 8 | python packages. 9 | 10 | A core developer should follow these steps to trigger the creation and upload of 11 | a release `X.Y.Z` of **wordcloud** on `PyPI`_.. 12 | 13 | ------------------------- 14 | Documentation conventions 15 | ------------------------- 16 | 17 | The commands reported below should be evaluated in the same terminal session. 18 | 19 | Commands to evaluate starts with a dollar sign. For example:: 20 | 21 | $ echo "Hello" 22 | Hello 23 | 24 | means that ``echo "Hello"`` should be copied and evaluated in the terminal. 25 | 26 | ---------------------- 27 | Setting up environment 28 | ---------------------- 29 | 30 | 1. First, `register for an account on PyPI `_. 31 | 32 | 33 | 2. If not already the case, ask to be added as a ``Package Index Maintainer``. 34 | 35 | 36 | 3. Create a ``~/.pypirc`` file with your login credentials:: 37 | 38 | [distutils] 39 | index-servers = 40 | pypi 41 | pypitest 42 | 43 | [pypi] 44 | username= 45 | password= 46 | 47 | [pypitest] 48 | repository=https://test.pypi.org/legacy/ 49 | username= 50 | password= 51 | 52 | where ```` and ```` correspond to your PyPI account. 53 | 54 | 55 | --------------------- 56 | `PyPI`_: Step-by-step 57 | --------------------- 58 | 59 | 1. Make sure that all CI tests are passing: `AppVeyor`_, `CircleCI`_ and `Travis CI`_. 60 | 61 | 62 | 2. List all tags sorted by version 63 | 64 | .. code:: 65 | 66 | $ git tag -l | sort -V 67 | 68 | 69 | 3. Choose the next release version number 70 | 71 | .. code:: 72 | 73 | release=X.Y.Z 74 | 75 | .. warning:: 76 | 77 | To ensure the packages are uploaded on `PyPI`_, tags must match this regular 78 | expression: ``^[0-9]+(\.[0-9]+)*(\.post[0-9]+)?$``. 79 | 80 | 81 | 4. Download latest sources 82 | 83 | .. code:: 84 | 85 | cd /tmp && git clone git@github.com:amueller/word_cloud && cd word_cloud 86 | 87 | 88 | 5. In `doc/changelog.rst` change ``Next Release`` section header with 89 | ``WordCloud X.Y.Z`` and commit the changes using the same title 90 | 91 | .. code:: 92 | 93 | $ git add doc/changelog.rst 94 | $ git commit -m "WordCloud ${release}" 95 | 96 | 97 | 6. Tag the release 98 | 99 | .. code:: 100 | 101 | $ git tag --sign -m "WordCloud ${release}" ${release} master 102 | 103 | .. note:: 104 | 105 | We recommend using a GPG key to sign the tag. 106 | 107 | 7. Publish the tag 108 | 109 | .. code:: 110 | 111 | $ git push origin ${release} 112 | 113 | .. note:: This will trigger builds on each CI services and automatically upload the wheels \ 114 | and source distribution on `PyPI`_. 115 | 116 | 8. Check the status of the builds on `AppVeyor`_, `CircleCI`_ and `Travis CI`_. 117 | 118 | 9. Once the builds are completed, check that the distributions are available on `PyPI`_. 119 | 120 | 121 | 10. Create a clean testing environment to test the installation 122 | 123 | .. code:: 124 | 125 | $ mkvirtualenv wordcloud-${release}-install-test && \ 126 | pip install wordcloud && \ 127 | python -c "import wordcloud;print(wordcloud.__version__)" 128 | 129 | .. note:: 130 | 131 | If the ``mkvirtualenv`` is not available, this means you do not have `virtualenvwrapper`_ 132 | installed, in that case, you could either install it or directly use `virtualenv`_ or `venv`_. 133 | 134 | 11. Cleanup 135 | 136 | .. code:: 137 | 138 | $ deactivate && \ 139 | rm -rf dist/* && \ 140 | rmvirtualenv wordcloud-${release}-install-test 141 | 142 | 143 | 12. Add a ``Next Release`` section back in `doc/changelog.rst`, merge the result 144 | and push local changes:: 145 | 146 | $ git push origin master 147 | 148 | 149 | .. _virtualenvwrapper: https://virtualenvwrapper.readthedocs.io/ 150 | .. _virtualenv: http://virtualenv.readthedocs.io 151 | .. _venv: https://docs.python.org/3/library/venv.html 152 | 153 | .. _AppVeyor: https://ci.appveyor.com/project/amueller/word-cloud/history 154 | .. _CircleCI: https://circleci.com/gh/amueller/word_cloud 155 | .. _Travis CI: https://travis-ci.org/amueller/word_cloud/pull_requests 156 | 157 | .. _PyPI: https://pypi.org/project/wordcloud -------------------------------------------------------------------------------- /doc/references.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | All functionality is encapsulated in the WordCloud class. 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | .. automodule:: wordcloud 9 | :no-members: 10 | :no-inherited-members: 11 | 12 | .. currentmodule:: wordcloud 13 | 14 | .. autosummary:: 15 | :toctree: generated/ 16 | :template: class.rst 17 | 18 | WordCloud 19 | ImageColorGenerator 20 | 21 | :template: function.rst 22 | 23 | random_color_func 24 | colormap_color_func 25 | get_single_color_func 26 | -------------------------------------------------------------------------------- /doc/requirements-doc.txt: -------------------------------------------------------------------------------- 1 | numpydoc 2 | imageio 3 | sphinx 4 | sphinx_rtd_theme 5 | sphinx_gallery 6 | sphinx-argparse 7 | sphinx-issues 8 | 9 | # 10 | # Example requirements 11 | # 12 | 13 | # all examples 14 | matplotlib 15 | 16 | # frequency example 17 | multidict 18 | 19 | # wordcloud_cn example 20 | jieba 21 | scipy 22 | 23 | # arabic example 24 | python-bidi 25 | arabic_reshaper 26 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _example_gallery: 2 | 3 | Gallery of Examples 4 | =================== 5 | -------------------------------------------------------------------------------- /examples/a_new_hope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/a_new_hope.png -------------------------------------------------------------------------------- /examples/a_new_hope.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Using custom colors 4 | =================== 5 | 6 | Using the recolor method and custom coloring functions. 7 | """ 8 | 9 | import numpy as np 10 | from PIL import Image 11 | from os import path 12 | import matplotlib.pyplot as plt 13 | import os 14 | import random 15 | 16 | from wordcloud import WordCloud, STOPWORDS 17 | 18 | 19 | def grey_color_func(word, font_size, position, orientation, random_state=None, 20 | **kwargs): 21 | return "hsl(0, 0%%, %d%%)" % random.randint(60, 100) 22 | 23 | 24 | # get data directory (using getcwd() is needed to support running example in generated IPython notebook) 25 | d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() 26 | 27 | # read the mask image taken from 28 | # http://www.stencilry.org/stencils/movies/star%20wars/storm-trooper.gif 29 | mask = np.array(Image.open(path.join(d, "stormtrooper_mask.png"))) 30 | 31 | # movie script of "a new hope" 32 | # http://www.imsdb.com/scripts/Star-Wars-A-New-Hope.html 33 | # May the lawyers deem this fair use. 34 | text = open(path.join(d, 'a_new_hope.txt')).read() 35 | 36 | # pre-processing the text a little bit 37 | text = text.replace("HAN", "Han") 38 | text = text.replace("LUKE'S", "Luke") 39 | 40 | # adding movie script specific stopwords 41 | stopwords = set(STOPWORDS) 42 | stopwords.add("int") 43 | stopwords.add("ext") 44 | 45 | wc = WordCloud(max_words=1000, mask=mask, stopwords=stopwords, margin=10, 46 | random_state=1).generate(text) 47 | # store default colored image 48 | default_colors = wc.to_array() 49 | plt.title("Custom colors") 50 | plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3), 51 | interpolation="bilinear") 52 | wc.to_file("a_new_hope.png") 53 | plt.axis("off") 54 | plt.figure() 55 | plt.title("Default colors") 56 | plt.imshow(default_colors, interpolation="bilinear") 57 | plt.axis("off") 58 | plt.show() 59 | -------------------------------------------------------------------------------- /examples/a_new_hope_bigrams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/a_new_hope_bigrams.png -------------------------------------------------------------------------------- /examples/alice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/alice.png -------------------------------------------------------------------------------- /examples/alice_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/alice_color.png -------------------------------------------------------------------------------- /examples/alice_colored.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/alice_colored.png -------------------------------------------------------------------------------- /examples/alice_license.txt: -------------------------------------------------------------------------------- 1 | ***** This file should be named 11.txt or 11.zip ***** 2 | This and all associated files of various formats will be found in: 3 | http://www.gutenberg.org/1/11/ 4 | 5 | 6 | 7 | Updated editions will replace the previous one--the old editions 8 | will be renamed. 9 | 10 | Creating the works from public domain print editions means that no 11 | one owns a United States copyright in these works, so the Foundation 12 | (and you!) can copy and distribute it in the United States without 13 | permission and without paying copyright royalties. Special rules, 14 | set forth in the General Terms of Use part of this license, apply to 15 | copying and distributing Project Gutenberg-tm electronic works to 16 | protect the PROJECT GUTENBERG-tm concept and trademark. Project 17 | Gutenberg is a registered trademark, and may not be used if you 18 | charge for the eBooks, unless you receive specific permission. If you 19 | do not charge anything for copies of this eBook, complying with the 20 | rules is very easy. You may use this eBook for nearly any purpose 21 | such as creation of derivative works, reports, performances and 22 | research. They may be modified and printed and given away--you may do 23 | practically ANYTHING with public domain eBooks. Redistribution is 24 | subject to the trademark license, especially commercial 25 | redistribution. 26 | 27 | 28 | 29 | *** START: FULL LICENSE *** 30 | 31 | THE FULL PROJECT GUTENBERG LICENSE 32 | PLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK 33 | 34 | To protect the Project Gutenberg-tm mission of promoting the free 35 | distribution of electronic works, by using or distributing this work 36 | (or any other work associated in any way with the phrase "Project 37 | Gutenberg"), you agree to comply with all the terms of the Full Project 38 | Gutenberg-tm License (available with this file or online at 39 | http://gutenberg.org/license). 40 | 41 | 42 | Section 1. General Terms of Use and Redistributing Project Gutenberg-tm 43 | electronic works 44 | 45 | 1.A. By reading or using any part of this Project Gutenberg-tm 46 | electronic work, you indicate that you have read, understand, agree to 47 | and accept all the terms of this license and intellectual property 48 | (trademark/copyright) agreement. If you do not agree to abide by all 49 | the terms of this agreement, you must cease using and return or destroy 50 | all copies of Project Gutenberg-tm electronic works in your possession. 51 | If you paid a fee for obtaining a copy of or access to a Project 52 | Gutenberg-tm electronic work and you do not agree to be bound by the 53 | terms of this agreement, you may obtain a refund from the person or 54 | entity to whom you paid the fee as set forth in paragraph 1.E.8. 55 | 56 | 1.B. "Project Gutenberg" is a registered trademark. It may only be 57 | used on or associated in any way with an electronic work by people who 58 | agree to be bound by the terms of this agreement. There are a few 59 | things that you can do with most Project Gutenberg-tm electronic works 60 | even without complying with the full terms of this agreement. See 61 | paragraph 1.C below. There are a lot of things you can do with Project 62 | Gutenberg-tm electronic works if you follow the terms of this agreement 63 | and help preserve free future access to Project Gutenberg-tm electronic 64 | works. See paragraph 1.E below. 65 | 66 | 1.C. The Project Gutenberg Literary Archive Foundation ("the Foundation" 67 | or PGLAF), owns a compilation copyright in the collection of Project 68 | Gutenberg-tm electronic works. Nearly all the individual works in the 69 | collection are in the public domain in the United States. If an 70 | individual work is in the public domain in the United States and you are 71 | located in the United States, we do not claim a right to prevent you from 72 | copying, distributing, performing, displaying or creating derivative 73 | works based on the work as long as all references to Project Gutenberg 74 | are removed. Of course, we hope that you will support the Project 75 | Gutenberg-tm mission of promoting free access to electronic works by 76 | freely sharing Project Gutenberg-tm works in compliance with the terms of 77 | this agreement for keeping the Project Gutenberg-tm name associated with 78 | the work. You can easily comply with the terms of this agreement by 79 | keeping this work in the same format with its attached full Project 80 | Gutenberg-tm License when you share it without charge with others. 81 | 82 | 1.D. The copyright laws of the place where you are located also govern 83 | what you can do with this work. Copyright laws in most countries are in 84 | a constant state of change. If you are outside the United States, check 85 | the laws of your country in addition to the terms of this agreement 86 | before downloading, copying, displaying, performing, distributing or 87 | creating derivative works based on this work or any other Project 88 | Gutenberg-tm work. The Foundation makes no representations concerning 89 | the copyright status of any work in any country outside the United 90 | States. 91 | 92 | 1.E. Unless you have removed all references to Project Gutenberg: 93 | 94 | 1.E.1. The following sentence, with active links to, or other immediate 95 | access to, the full Project Gutenberg-tm License must appear prominently 96 | whenever any copy of a Project Gutenberg-tm work (any work on which the 97 | phrase "Project Gutenberg" appears, or with which the phrase "Project 98 | Gutenberg" is associated) is accessed, displayed, performed, viewed, 99 | copied or distributed: 100 | 101 | This eBook is for the use of anyone anywhere at no cost and with 102 | almost no restrictions whatsoever. You may copy it, give it away or 103 | re-use it under the terms of the Project Gutenberg License included 104 | with this eBook or online at www.gutenberg.org 105 | 106 | 1.E.2. If an individual Project Gutenberg-tm electronic work is derived 107 | from the public domain (does not contain a notice indicating that it is 108 | posted with permission of the copyright holder), the work can be copied 109 | and distributed to anyone in the United States without paying any fees 110 | or charges. If you are redistributing or providing access to a work 111 | with the phrase "Project Gutenberg" associated with or appearing on the 112 | work, you must comply either with the requirements of paragraphs 1.E.1 113 | through 1.E.7 or obtain permission for the use of the work and the 114 | Project Gutenberg-tm trademark as set forth in paragraphs 1.E.8 or 115 | 1.E.9. 116 | 117 | 1.E.3. If an individual Project Gutenberg-tm electronic work is posted 118 | with the permission of the copyright holder, your use and distribution 119 | must comply with both paragraphs 1.E.1 through 1.E.7 and any additional 120 | terms imposed by the copyright holder. Additional terms will be linked 121 | to the Project Gutenberg-tm License for all works posted with the 122 | permission of the copyright holder found at the beginning of this work. 123 | 124 | 1.E.4. Do not unlink or detach or remove the full Project Gutenberg-tm 125 | License terms from this work, or any files containing a part of this 126 | work or any other work associated with Project Gutenberg-tm. 127 | 128 | 1.E.5. Do not copy, display, perform, distribute or redistribute this 129 | electronic work, or any part of this electronic work, without 130 | prominently displaying the sentence set forth in paragraph 1.E.1 with 131 | active links or immediate access to the full terms of the Project 132 | Gutenberg-tm License. 133 | 134 | 1.E.6. You may convert to and distribute this work in any binary, 135 | compressed, marked up, nonproprietary or proprietary form, including any 136 | word processing or hypertext form. However, if you provide access to or 137 | distribute copies of a Project Gutenberg-tm work in a format other than 138 | "Plain Vanilla ASCII" or other format used in the official version 139 | posted on the official Project Gutenberg-tm web site (www.gutenberg.org), 140 | you must, at no additional cost, fee or expense to the user, provide a 141 | copy, a means of exporting a copy, or a means of obtaining a copy upon 142 | request, of the work in its original "Plain Vanilla ASCII" or other 143 | form. Any alternate format must include the full Project Gutenberg-tm 144 | License as specified in paragraph 1.E.1. 145 | 146 | 1.E.7. Do not charge a fee for access to, viewing, displaying, 147 | performing, copying or distributing any Project Gutenberg-tm works 148 | unless you comply with paragraph 1.E.8 or 1.E.9. 149 | 150 | 1.E.8. You may charge a reasonable fee for copies of or providing 151 | access to or distributing Project Gutenberg-tm electronic works provided 152 | that 153 | 154 | - You pay a royalty fee of 20% of the gross profits you derive from 155 | the use of Project Gutenberg-tm works calculated using the method 156 | you already use to calculate your applicable taxes. The fee is 157 | owed to the owner of the Project Gutenberg-tm trademark, but he 158 | has agreed to donate royalties under this paragraph to the 159 | Project Gutenberg Literary Archive Foundation. Royalty payments 160 | must be paid within 60 days following each date on which you 161 | prepare (or are legally required to prepare) your periodic tax 162 | returns. Royalty payments should be clearly marked as such and 163 | sent to the Project Gutenberg Literary Archive Foundation at the 164 | address specified in Section 4, "Information about donations to 165 | the Project Gutenberg Literary Archive Foundation." 166 | 167 | - You provide a full refund of any money paid by a user who notifies 168 | you in writing (or by e-mail) within 30 days of receipt that s/he 169 | does not agree to the terms of the full Project Gutenberg-tm 170 | License. You must require such a user to return or 171 | destroy all copies of the works possessed in a physical medium 172 | and discontinue all use of and all access to other copies of 173 | Project Gutenberg-tm works. 174 | 175 | - You provide, in accordance with paragraph 1.F.3, a full refund of any 176 | money paid for a work or a replacement copy, if a defect in the 177 | electronic work is discovered and reported to you within 90 days 178 | of receipt of the work. 179 | 180 | - You comply with all other terms of this agreement for free 181 | distribution of Project Gutenberg-tm works. 182 | 183 | 1.E.9. If you wish to charge a fee or distribute a Project Gutenberg-tm 184 | electronic work or group of works on different terms than are set 185 | forth in this agreement, you must obtain permission in writing from 186 | both the Project Gutenberg Literary Archive Foundation and Michael 187 | Hart, the owner of the Project Gutenberg-tm trademark. Contact the 188 | Foundation as set forth in Section 3 below. 189 | 190 | 1.F. 191 | 192 | 1.F.1. Project Gutenberg volunteers and employees expend considerable 193 | effort to identify, do copyright research on, transcribe and proofread 194 | public domain works in creating the Project Gutenberg-tm 195 | collection. Despite these efforts, Project Gutenberg-tm electronic 196 | works, and the medium on which they may be stored, may contain 197 | "Defects," such as, but not limited to, incomplete, inaccurate or 198 | corrupt data, transcription errors, a copyright or other intellectual 199 | property infringement, a defective or damaged disk or other medium, a 200 | computer virus, or computer codes that damage or cannot be read by 201 | your equipment. 202 | 203 | 1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the "Right 204 | of Replacement or Refund" described in paragraph 1.F.3, the Project 205 | Gutenberg Literary Archive Foundation, the owner of the Project 206 | Gutenberg-tm trademark, and any other party distributing a Project 207 | Gutenberg-tm electronic work under this agreement, disclaim all 208 | liability to you for damages, costs and expenses, including legal 209 | fees. YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT 210 | LIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE 211 | PROVIDED IN PARAGRAPH F3. YOU AGREE THAT THE FOUNDATION, THE 212 | TRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE 213 | LIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR 214 | INCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH 215 | DAMAGE. 216 | 217 | 1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a 218 | defect in this electronic work within 90 days of receiving it, you can 219 | receive a refund of the money (if any) you paid for it by sending a 220 | written explanation to the person you received the work from. If you 221 | received the work on a physical medium, you must return the medium with 222 | your written explanation. The person or entity that provided you with 223 | the defective work may elect to provide a replacement copy in lieu of a 224 | refund. If you received the work electronically, the person or entity 225 | providing it to you may choose to give you a second opportunity to 226 | receive the work electronically in lieu of a refund. If the second copy 227 | is also defective, you may demand a refund in writing without further 228 | opportunities to fix the problem. 229 | 230 | 1.F.4. Except for the limited right of replacement or refund set forth 231 | in paragraph 1.F.3, this work is provided to you 'AS-IS' WITH NO OTHER 232 | WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 233 | WARRANTIES OF MERCHANTIBILITY OR FITNESS FOR ANY PURPOSE. 234 | 235 | 1.F.5. Some states do not allow disclaimers of certain implied 236 | warranties or the exclusion or limitation of certain types of damages. 237 | If any disclaimer or limitation set forth in this agreement violates the 238 | law of the state applicable to this agreement, the agreement shall be 239 | interpreted to make the maximum disclaimer or limitation permitted by 240 | the applicable state law. The invalidity or unenforceability of any 241 | provision of this agreement shall not void the remaining provisions. 242 | 243 | 1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the 244 | trademark owner, any agent or employee of the Foundation, anyone 245 | providing copies of Project Gutenberg-tm electronic works in accordance 246 | with this agreement, and any volunteers associated with the production, 247 | promotion and distribution of Project Gutenberg-tm electronic works, 248 | harmless from all liability, costs and expenses, including legal fees, 249 | that arise directly or indirectly from any of the following which you do 250 | or cause to occur: (a) distribution of this or any Project Gutenberg-tm 251 | work, (b) alteration, modification, or additions or deletions to any 252 | Project Gutenberg-tm work, and (c) any Defect you cause. 253 | 254 | 255 | Section 2. Information about the Mission of Project Gutenberg-tm 256 | 257 | Project Gutenberg-tm is synonymous with the free distribution of 258 | electronic works in formats readable by the widest variety of computers 259 | including obsolete, old, middle-aged and new computers. It exists 260 | because of the efforts of hundreds of volunteers and donations from 261 | people in all walks of life. 262 | 263 | Volunteers and financial support to provide volunteers with the 264 | assistance they need, is critical to reaching Project Gutenberg-tm's 265 | goals and ensuring that the Project Gutenberg-tm collection will 266 | remain freely available for generations to come. In 2001, the Project 267 | Gutenberg Literary Archive Foundation was created to provide a secure 268 | and permanent future for Project Gutenberg-tm and future generations. 269 | To learn more about the Project Gutenberg Literary Archive Foundation 270 | and how your efforts and donations can help, see Sections 3 and 4 271 | and the Foundation web page at http://www.pglaf.org. 272 | 273 | 274 | Section 3. Information about the Project Gutenberg Literary Archive 275 | Foundation 276 | 277 | The Project Gutenberg Literary Archive Foundation is a non profit 278 | 501(c)(3) educational corporation organized under the laws of the 279 | state of Mississippi and granted tax exempt status by the Internal 280 | Revenue Service. The Foundation's EIN or federal tax identification 281 | number is 64-6221541. Its 501(c)(3) letter is posted at 282 | http://pglaf.org/fundraising. Contributions to the Project Gutenberg 283 | Literary Archive Foundation are tax deductible to the full extent 284 | permitted by U.S. federal laws and your state's laws. 285 | 286 | The Foundation's principal office is located at 4557 Melan Dr. S. 287 | Fairbanks, AK, 99712., but its volunteers and employees are scattered 288 | throughout numerous locations. Its business office is located at 289 | 809 North 1500 West, Salt Lake City, UT 84116, (801) 596-1887, email 290 | business@pglaf.org. Email contact links and up to date contact 291 | information can be found at the Foundation's web site and official 292 | page at http://pglaf.org 293 | 294 | For additional contact information: 295 | Dr. Gregory B. Newby 296 | Chief Executive and Director 297 | gbnewby@pglaf.org 298 | 299 | 300 | Section 4. Information about Donations to the Project Gutenberg 301 | Literary Archive Foundation 302 | 303 | Project Gutenberg-tm depends upon and cannot survive without wide 304 | spread public support and donations to carry out its mission of 305 | increasing the number of public domain and licensed works that can be 306 | freely distributed in machine readable form accessible by the widest 307 | array of equipment including outdated equipment. Many small donations 308 | ($1 to $5,000) are particularly important to maintaining tax exempt 309 | status with the IRS. 310 | 311 | The Foundation is committed to complying with the laws regulating 312 | charities and charitable donations in all 50 states of the United 313 | States. Compliance requirements are not uniform and it takes a 314 | considerable effort, much paperwork and many fees to meet and keep up 315 | with these requirements. We do not solicit donations in locations 316 | where we have not received written confirmation of compliance. To 317 | SEND DONATIONS or determine the status of compliance for any 318 | particular state visit http://pglaf.org 319 | 320 | While we cannot and do not solicit contributions from states where we 321 | have not met the solicitation requirements, we know of no prohibition 322 | against accepting unsolicited donations from donors in such states who 323 | approach us with offers to donate. 324 | 325 | International donations are gratefully accepted, but we cannot make 326 | any statements concerning tax treatment of donations received from 327 | outside the United States. U.S. laws alone swamp our small staff. 328 | 329 | Please check the Project Gutenberg Web pages for current donation 330 | methods and addresses. Donations are accepted in a number of other 331 | ways including checks, online payments and credit card donations. 332 | To donate, please visit: http://pglaf.org/donate 333 | 334 | 335 | Section 5. General Information About Project Gutenberg-tm electronic 336 | works. 337 | 338 | Professor Michael S. Hart is the originator of the Project Gutenberg-tm 339 | concept of a library of electronic works that could be freely shared 340 | with anyone. For thirty years, he produced and distributed Project 341 | Gutenberg-tm eBooks with only a loose network of volunteer support. 342 | 343 | 344 | Project Gutenberg-tm eBooks are often created from several printed 345 | editions, all of which are confirmed as Public Domain in the U.S. 346 | unless a copyright notice is included. Thus, we do not necessarily 347 | keep eBooks in compliance with any particular paper edition. 348 | 349 | 350 | Most people start at our Web site which has the main PG search facility: 351 | 352 | http://www.gutenberg.org 353 | 354 | This Web site includes information about Project Gutenberg-tm, 355 | including how to make donations to the Project Gutenberg Literary 356 | Archive Foundation, how to help produce our new eBooks, and how to 357 | subscribe to our email newsletter to hear about new eBooks. 358 | 359 | -------------------------------------------------------------------------------- /examples/alice_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/alice_mask.png -------------------------------------------------------------------------------- /examples/arabic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Create wordcloud with Arabic 4 | =============== 5 | Generating a wordcloud from Arabic text 6 | 7 | Dependencies: 8 | - bidi.algorithm 9 | - arabic_reshaper 10 | 11 | Dependencies installation: 12 | pip install python-bidi arabic_reshape 13 | """ 14 | 15 | import os 16 | import codecs 17 | from wordcloud import WordCloud 18 | import arabic_reshaper 19 | from bidi.algorithm import get_display 20 | 21 | # get data directory (using getcwd() is needed to support running example in generated IPython notebook) 22 | d = os.path.dirname(__file__) if "__file__" in locals() else os.getcwd() 23 | 24 | # Read the whole text. 25 | f = codecs.open(os.path.join(d, 'arabicwords.txt'), 'r', 'utf-8') 26 | 27 | # Make text readable for a non-Arabic library like wordcloud 28 | text = arabic_reshaper.reshape(f.read()) 29 | text = get_display(text) 30 | 31 | # Generate a word cloud image 32 | wordcloud = WordCloud(font_path='fonts/NotoNaskhArabic/NotoNaskhArabic-Regular.ttf').generate(text) 33 | 34 | # Export to an image 35 | wordcloud.to_file("arabic_example.png") 36 | -------------------------------------------------------------------------------- /examples/arabic_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/arabic_example.png -------------------------------------------------------------------------------- /examples/arabicwords.txt: -------------------------------------------------------------------------------- 1 | اللُّغَة العَرَبِيّة هي أكثر اللغات تحدثاً ونطقاً ضمن مجموعة اللغات السامية، وإحدى أكثر اللغات انتشاراً في العالم، يتحدثها أكثر من 422 مليون نسمة،[4](1) ويتوزع متحدثوها في الوطن العربي، بالإضافة إلى العديد من المناطق الأخرى المجاورة كالأحواز وتركيا وتشاد ومالي والسنغال وإرتيريا و إثيوبيا و جنوب السودان و إيران. اللغة العربية ذات أهمية قصوى لدى المسلمين، فهي لغة مقدسة (لغة القرآن)، ولا تتم الصلاة (وعبادات أخرى) في الإسلام إلا بإتقان بعض من كلماتها.[5][6] العربية هي أيضاً لغة شعائرية رئيسية لدى عدد من الكنائس المسيحية في الوطن العربي، كما كتبت بها كثير من أهم الأعمال الدينية والفكرية اليهودية في العصور الوسطى. وأثّر انتشار الإسلام، وتأسيسه دولاً، في ارتفاع مكانة اللغة العربية، وأصبحت لغة السياسة والعلم والأدب لقرون طويلة في الأراضي التي حكمها المسلمون، وأثرت العربية تأثيراً مباشراً أو غير مباشر على كثير من اللغات الأخرى في العالم الإسلامي، كالتركية والفارسية والأمازيغية والكردية والأردوية والماليزية والإندونيسية والألبانية وبعض اللغات الإفريقية الأخرى مثل الهاوسا والسواحيلية والتجرية والأمهرية و الصومالية، وبعض اللغات الأوروبية وخاصةً المتوسطية كالإسبانية والبرتغالية والمالطية والصقلية. كما أنها تُدرَّس بشكل رسمي أو غير رسمي في الدول الإسلامية والدول الإفريقية المحاذية للوطن العربي. 2 | العربية لغة رسمية في كل دول الوطن العربي إضافة إلى كونها لغة رسمية في تشاد وإريتريا وإسرائيل. وهي إحدى اللغات الرسمية الست في منظمة الأمم المتحدة، ويُحتفل باليوم العالمي للغة العربية في 18 ديسمبر كذكرى اعتماد العربية بين لغات العمل في الأمم المتحدة.[7] 3 | واللغة العربية من أغزر اللغات من حيث المادة اللغوية، فعلى سبيل المثال يحوي معجم لسان العرب لابن منظور من القرن الثالث عشر أكثر من 80 ألف مادة، بينما في اللغة الإنجليزية فإن قاموس صموئيل جونسون - وهو من أوائل من وضع قاموساً إنجليزياً من القرن الثامن عشر-[8] يحتوي على 42 ألف كلمة.[9] 4 | تحتوي العربية على 28 حرفاً مكتوباً. ويرى بعض اللغويين أنه يجب إضافة حرف الهمزة إلى حروف العربية، ليصبح عدد الحروف 29. تُكتب العربية من اليمين إلى اليسار - ومثلها اللغة الفارسية والعبرية على عكس كثير من اللغات العالمية - ومن أعلى الصفحة إلى أسفلها. 5 | حيث ذهب به إلى أنها للعرب خاصة.[10] غير أن الضاد المقصودة هنا ليست الضاد المعروفة والمستخدمة اليوم في دول مثل جمهورية مصر العربية، والتي هي عبارة عن دال مفخمة، وهي التي لاتُستحسن قراءة القرآن أو الشعر العربي بها، أما الضاد العربية القديمة فهي صوتٌ آخر مزيجٌ بين الظاء واللام، واندمج هذا الصوت مع الظاء في الجزيرة العربية. ولأن الظاء هي ذال مفخمة، أي أنها حرف ما - بين - أسناني، فقد تحولت بدورها في الحواضر إلى دال مفخمة كتحول الثاء إلى تاء والذال إلى دال، وصارت هذه الدال المفخمة هي الضاد الحديثة. فالدال المفخمة ليست خاصة بالعربية، بل هي في الواقع موجودة في لغات كثيرة. وهي ليست الضاد الأصلية التي كان يعنيها المتنبي وابن منظور صاحب لسان العرب وغيرهم. 6 | تنتمي اللغة العربية إلى أسرة اللغات السامية المتفرعة من مجموعة اللغات الإفريقية الآسيوية. وتضم مجموعة اللغات السامية لغات حضارة الهلال الخصيب القديمة، مثل الأكادية والكنعانية والآرامية واللغة الصيهدية (جنوب الجزيرة العربية) واللغات العربية الشمالية القديمة وبعض لغات القرن الإفريقي كالأمهرية. وعلى وجه التحديد، يضع اللغويون اللغة العربية في المجموعة السامية الوسطى من اللغات السامية الغربية. 7 | والعربية من أحدث هذه اللغات نشأة وتاريخاً، ولكن يعتقد البعض أنها الأقرب إلى اللغة السامية الأم التي انبثقت منها اللغات السامية الأخرى، وذلك لاحتباس العرب في جزيرة العرب فلم تتعرض لما تعرضت له باقي اللغات السامية من اختلاط.[15] ولكن هناك من يخالف هذا الرأي بين علماء اللسانيات، حيث أن تغير اللغة هو عملية مستمرة عبر الزمن والانعزال الجغرافي قد يزيد من حدة هذا التغير حيث يبدأ نشوء أيّة لغة جديدة بنشوء لهجة جديدة في منطقة منعزلة جغرافياً. بالإضافة لافتراض وجود لغة سامية أم لا يعني وجودها بالمعنى المفهوم للغة الواحدة بل هي تعبير مجازي قصد به الإفصاح عن تقارب مجموعة من اللغات [16] فقد كان علماء اللسانيات يعتمدون على قرب لغة وعقلية من يرونه مرشحاً لعضوية عائلة اللغات السامية وبُنيت دراساتهم على أسس جغرافية وسياسية وليس على أُسس عرقية ولا علاقة لها بنظرة التوراة لأبناء سام[17] وكثرة قواعد اللغة العربية ترجح أنها طرأت عليها في فترات لاحقة وأنها مرت بأطوار عديدة مما يضعف فرضية أن هذه اللغة أقرب لما عُرف اصطلاحاً باللغة السامية الأم هذه، ولا توجد لغة في العالم تستطيع الادعاء أنها نقية وصافية من عوامل ومؤثرات خارجية 8 | هنالك العديد من الآراء حول أصل العربية لدى قدامى اللغويين العرب منها أن اللغة العربية أقدم من العرب أنفسهم فقالو أنها لغة آدم في الجنة، ولعب التنافس القبلي في عصر الخلافة العباسية دوراً كبيراً في نُشوء هذه النظريات، فزعم بعضهم أن يعرب بن قحطان كان أول من تكلم هذه العربية، وفريق ذهب أن إسماعيل هو أول من تكلم بها، وأنه نسي لسان أبيه [19]،(2)، إلا أنه لا وجود لبراهين علمية تُثبت أياً من هذه النظريات، فجنوب الجزيرة العربية، موطن يعرب المفترض كان يتحدث بعربية مختلفة لها قواعدها، وعثر في مواضع مُتعدّدة في شمال شبه الجزيرة العربية كذلك على كتابات قديمة بلغات متباينة ومختلفة عن عربية القرآن أو الشعر الجاهلي بل هي مختلفة عن بعضها البعض [20]، ولم يهتم اللغويون العرب القدماء بهذه اللغات واعتبروها لغات "رديئة"، فقد اعتبروا اللغة العربية لغة القرآن هي الأصل رغم أن تلك اللغات العربية الجنوبية والشمالية أقدم من عربية القرآن [21] وبعضهم كان يرى أن دراسة وبحث تلك اللغات واللهجات مضيعة للوقت وإحياء للجاهلية [22] فقد كانوا مُدركين أن ألسنة العرب متباينة ومختلفة ، فقد قال محمد بن جرير الطبري [23] : 9 | « كانت العرب وإن جمع جميعها اسم أنهم عرب، فهم مختلفو الألسن بالبيان متباينو المنطق والكلام» 10 | ومنهم من يرى أنها لغة قريش خاصة ويؤيد هذا الرأي أن أقدم النصوص المتوفرة بهذه اللغة هو القرآن والنبي محمد قُرشي وأول دعوته كانت بينهم وهو الرأي الذي أجمع عليه غالب اللغويين العرب القدماء [24] ومنهم من يرى أنها لهجة عربية تطورت في مملكة كندة في منتصف القرن السادس الميلادي بسبب إغداق ملوك تلك المملكة المال على الشعراء فأدى لتنافسهم وتوحد لهجة شعرية بينهم وهم أقدم من قريش [25] وأيد ذلك العديد من المستشرقين فرجّحوا وجود ما أسموه بـ"اللغة العالية" وهي لغة شعرية خاصة بالإضافة للهجات محلية فاعتبروا تلك اللغة لغة رفيعة تظهر مدارك الشاعر وثقافته أمام الملك [26] والرأي القائل أنها لغة قريش أقوى لأن أقدم النصوص بهذه اللغة هو القرآن فالشعر الجاهلي، إن كان جاهليًا حقًا، دُوّن بعد الإسلام ولا يملك الباحثون نسخة أصلية لمُعلّقة أو قصيدة جاهلية ليُحدّد تاريخها بشكل دقيق. 11 | ولكن هناك فجوات عديدة تنخر بهذا الرأي إذ توجه العلماء الأقدمون إلى القول بأن مكة كانت "مهوى أفئدة العرب" وأنهم كانوا يعرضون لغتهم على قريش وأن تلك القبيلة كانت تختار الأصلح فتأخذه وتترك الرديء حتى غلبت لغتهم شبه الجزيرة بكاملها قبل الإسلام [27] يٌفنّد هذا الرأي الكتابات التي لا تبعد عن الإسلام بكثير وهي مكتوبة بلهجة مختلفة عن عربية القرآن فلم يُعثر على دليل أو أثر أن أحدًا من العرب قٌبيل الإسلام دوّن بهذه اللغة وأقرب الكتابات لها هي خمسة نصوص كُتبت بعربية نبطية وهي لغة مُتحكمة في أسلوبها وقواعدها والكثرة الغالبة من كلماتها تمنعها أن تعد في عداد عربية القرآن [28] وسيادة اللغة ترتبط غالبًا بسيادة سياسية ولا يوجد دليل قطعي على هذه السيادة القُرشية المزعومة على القبائل قبل الإسلام فقد كانت العرب قبل الإسلام تعتبر قريشًا تُجّارًا وليسوا مقاتلين [29] ويُرجّح عدد من الباحثين أن كل الوارد أنها لهجة قريش كان من باب تفضيل النبي محمد أو هو نتاج التنافس بين الأنصار والمهاجرين [30] ولم يرد في القرآن أنها لغة قريش بل وردت آيات تحدي أن يأتوا بمثله [31] فهذا التحدي أن يأتوا بمثله وبنفس لسانه "العربي المبين " دليل أنه أكمل الألسنة العربية وليس لسان بعض العرب على غيرهم بل إن المسلمين يعتبرون القرآن معجزة بحد ذاتها [29] أما أصل هذه اللغة ففيه اختلاف بين العلماء فكل الوارد عن أنها لهجة قريش سببه عدم العثور على أثر يسبق الإسلام مُدوّن بهذه اللغة ومصدر الباحثين الوحيد هو المصادر الإسلامية لاستنباط رأي علمي مقبول 12 | قسّم علماء الآثار اللغات العربية إلى قسمين عربية جنوبية قديمة وتشمل لغة سبئية وقتبانية وحضرمية ومعينية والقسم الآخر هو عربية شمالية قديمة وتشمل الحسائية والصفائية ولغة لحيانية/ديدانية وثمودية (لا علاقة لها بثمود إنما هي تسمية اصطلاحية) والتيمائية كان العرب الجنوبيون يستعملون الحرف نون كأداة للتعريف ويضعونه آخر الكلمة بينما العرب الشماليون استعملوا الحرف هاء كأداة للتعريف ومايُميّز العربية "الفصحى" عن هذه اللغات هو استعمالها لأداة التعريف "ال" [32] أقرب النصوص القديمة لهذه العربية هو نقش النمارة الذي اُكتُشِف بجبل الدروز وهو نص مؤرخ بتاريخ 328م ومكتوب بنوع من الخط النبطي القريب من الخط العربي الحالي، وهو عبارة عن رسم لضريح ملك مملكة الحيرة امرئ القيس بن عمرو وصف فيه بأنه "ملك العرب" فالسلطة السياسية متوفرة والنص مكتوب بعربية هي الأقرب لعربية القرآن [32] وهناك نقوش أخرى في قرية الفاو عاصمة مملكة كندة وقد كتبت بالخط المسند وتعود إلى القرن الأول قبل الميلاد ووصف الباحثون لغة قرية الفاو بأنها "شبه سبئية" ومع ذلك فإنهم استخدموا الألف والميم كأداة للتعريف [33]، ونقش عين عبدات في صحراء النقب، ويعود تاريخه إلى القرن الأول أو الثاني بعد الميلاد، وقد كتب بالحرف النبطي ونقش آخر لا يبتعد كثيرًا عن الإسلام إذ أنه دُوّن قبل مولد النبي محمد بسنتين وجاء فيه : 13 | « أنا شرحيل بر ظلمو بنيت ذا المرطول سنت 463 بعد مفسد خيبر بعام» 14 | فهو نص بعربية مفهومة ولكنها ليست عربية القرآن [32] وقد كان لممالك الحيرة وكندة والغساسنة سلطة سياسية مُثبَتة بدراسات أثرية وكتابات قديمة لليونانيين لم تكن موجودة لقريش فلا يوجد دليل على أن هذه الممالك كانت تتبع قريشا سياسيا أو دينيا حتى بل العكس، تجار قريش من كان يتودد إليهم وكانت مضارب أولئك الملوك مقصد الشعراء لا مكة [34] وعثر على كتابات قريبة من مكة تعود لفترة قريبة من الإسلام دُوّنت بلسان وخط مختلف عن الخط الذي دُوّن به القرآن ففرضية تغلب لسان قريش على العرب قبل الإسلام فندتها الاكتشافات الأثرية وأغلب الظن أنها ظهرت تعصبًا للنبي محمد ورغبة من اللغويين القدماء رفع شأن قبيلته والتي كانت صاحبة السلطة السياسية بعد الإسلام لأمد طويل [35] فاللغة العربية مرت بعدة أطوار ويمكن اعتبار لهجة بادية الشام والعراق القديمة أقرب اللهجات العربية إلى عربية القرآن [36] 15 | لم يُعرَف على وجه الدقة متى ظهرت كلمة العرب؛ وكذلك جميع المفردات المشتقة من الأصل المشتمل على أحرف العين والراء والباء، مثل كلمات: عربية وأعراب وغيرها، وأقدم نص أثري ورد فيه اسم العرب هو اللوح المسماري المنسوب للملك الآشوري شلمنصر الثالث في القرن التاسع قبل الميلاد، ذكر فيه انتصاره على تحالف ملوك آرام ضده بزعامة ملك دمشق، وأنه غنم ألف جمل من جنديبو من بلاد العرب، ويذكر البعض - من علماء اللغات[37] - أن كلمة عرب وجدت في بعض القصص والأوصاف اليونانية والفارسية وكان يقصد بها أعراب الجزيرة العربية، ولم يكن هناك لغة عربية معينة، لكن جميع اللغات التي تكلمت بها القبائل والأقوام التي كانت تسكن الجزيرة العربية سُمّيت لغات عربية نسبة إلى الجزيرة العربية. 16 | اللغة العربية من اللغات السامية التي شهدت تطورًا كبيرًا وتغيرًا في مراحلها الداخلية، وللقرآن فضل عظيم على اللغة العربية حيث بسببه أصبحت هذه اللغة الفرع الوحيد من اللغات السامية الذي حافظ على توهجه وعالميته، في حين اندثرت معظم اللغات السامية، وما بقي منها عدا لغات محلية ذات نطاق ضيق مثل: العبرية والأمهرية (لغة أهل الحبشة، أي ما يُعرف اليوم بإثيوبيا)،. يتحدث اللغة العربية حاليًا قُرابة 422 مليون نسمة كلغة أم، كما يتحدث بها من المسلمين غير العرب قرابة العدد نفسه كلغة ثانية. 17 | فصّل اللغويون اللغة العربية إلى ثلاثة أصناف رئيسية، وهي: التقليدي أو العربي القياسي، والرسمي، والمنطوقة أو لغة عربية عامية. بين الثلاثة، العربي التقليدي هو الشكل للغة العربية الذي يوجد بشكل حرفي في القرآن، من ذلك اسم الصنف. العربية القرآنية استعملت فقط في المؤسسات الدينية وأحيانا في التعليم، لكنها لم تتكلّم عمومًا. العربية القياسية من الناحية الأخرى هي اللغة الرسمية في الوطن العربي وهي مستعملة في الأدب غير الديني، مثل مؤسسات، عربي عامي "اللغة العاميّة"، يتكلّمها أغلبية الناس كلهجتهم اليومية. العربية العامية مختلفة من منطقة إلى منطقة، تقريبا مثل أيّة لهجة مُماثلة لأيّة لغة أخرى. 18 | انحدارها من اللغات السامية[عدل] 19 | يقول البعض إن اللغة العربية هي أقرب اللغات السامية إلى "اللغة السامية الأم"، وذلك لأنها احتفظت بعناصر قديمة تعود إلى اللغة السامية الأم أكثر من أي لغة سامية أخرى. ففيها أصوات ليست موجودة في أيّ من اللغات السامية الأخرى، بالإضافة إلى وجود نظام الإعراب والعديد من الصيغ لجموع التكسير والعديد من الظواهر اللغوية الأخرى التي كانت موجودة في اللغة السامية الأم.[38] وتُعد اللغة العربية "الشمالية"، أقرب اللغات إلى الأصل الذي تفرّعت منه اللغات الساميّة، لأن عرب الشمال لم يمتزجوا كثيرًا بغيرهم من الأمم، ولم تخضعهم أمم أخرى لحكمهم كما كان الشأن في كثير من الأمم السابقة الأخرى كالعبرانيين والبابليين والآشوريين، فحفظتهم الصحراء من غزو الأعداء وحكم الأمم الأجنبية، كما حفظت لغتهم من أن تتأثر تأثرًا كبيرًا بغيرهم.[39] كذلك فإن العربية هي أكثر اللغات السامية احتفاظًا بسمات السامية الأولى فقد احتفظت بمعظم أصوات اللغة السامية وخصائصها النحوية والصرفية،[40] فقد احتفظت بأصوات فقدتها بعض اللغات مثل: غ، ح، خ، ض، ظ، ث، ذ. ولا ينافسها في هذه المحافظة إلا العربية الجنوبية، واحتفظت أيضًا بعلامات الإعراب بينما فقدتها اللغات السامية الأخرى، وبمعظم الصيغ الاشتقاقية للسامية الأم: اسم الفاعل، المفعول، وتصريف الضمائر مع الأسماء والأفعال: بيتي، بيتك، بيته، رأيته، رآني. واحتفظت العربية بمعظم الصيغ الأصلية للضمائر وأسماء الإشارة والأسماء الموصولة. وبما أن معجم العربية الفصحى يُعتبر ثروة لفظية ضخمة لا يعادلها أي معجم سامي آخر، فإنها أصبحت عونًا لعلماء الساميات في إجراء المقارنات اللغوية أو قراءة النصوص السامية القديمة كنصوص الآثار الأكادية والفينيقية والأوغاريتية وحتى نصوص التوراة العبرية.[40] 20 | ينقض هذا الرأي فرضية أن هذه العربية هي العربية الصحيحة والسليمة وماسواها فاسد وردئ، فاللحيانيون والأنباط والسبئيين كانوا يكتبون ويدونون بعربية مختلفة وهي "فصحى" بالنسبة لهم فإن عرفوا هذه العربية أو اعتقدوا أنها أفصح وأفضل من لغاتهم لدونوا بها [41] كما أن لغات الأنباط والسبئيين موجودة قبل أن يوجد أي تدخل أجنبي في بلدانهم وإن كانت التجارة "تفسد" اللغة وفق منطق لغويي العصور الوسطى، لأنطبق منطقهم على قريش كونهم تجار وأهل حاضرة ولم يكونوا أعراباً[42] 21 | كما أن تسمية لغة المناذرة وكندة (الذين تركوا أقرب النصوص لهذه العربية) بالـ"عدنانية" خاطئ فهم لم يعرفوا التسمية قبل الإسلام ولم يدعوا النسبة بعده فكل هذه النظريات أنتجتها العصبيات التي ظهرت في عصر الخلافة العباسية [43] ويناقض أهل الأخبار أنفسهم لإنهم يلجئون للرواية الشفهية لا النقل عن مصدر وسند مكتوب [44] إذ يناقض رواية عدم احتكاك "عرب الشمال" بأحد روايات الإخباريين عن استعانة قصي بن كلاب بالروم لطرد الأزد من مكة [45] وإن شكك أحد في هذه الرواية فإن كتابات اليونانيين فصلت في أحوال شبه الجزيرة العربية منها سيطرة الإمبراطورية الرومانية وإخضاعها لشمال الجزيرة العربية مراراً [46] وذكر اليونان أن ساحل كنانة (القبيلة التي تتفرع منها قريش حسب النسابة) كان خاضعا للأحباش في القرن الأول قبل الميلاد [47][48] فهذه كتابات كلاسيكية واكتشافات أثرية تضعف الروايات التي ظهرت نتيجة العصبيات بين يثرب ومكة فحرصت قبيلة قريش أن تجعل من نفسها تاجرة جزيرة العرب، وزعيمتها في اللغة وأنها موطن الفصاحة والبيان التي يذهب إليها علماء اللغة ليقرروا عنهم الفصيح والردئ من الكلام فيصبح ملكهم بعد الإسلام أصيلاً مجيداً تليدا، ونتيجة طبيعية لما كانوا عليه قبله [43] 22 | توحيد اللهجات العربية[عدل] 23 | مقالة مفصلة: لغة عربية فصحى 24 | زعم أهل الأخبار أن هذه العربية هي عربية قريش وأنها لغة الأدب عند الجاهليين مستشهدين بالشعر الجاهلي لإثبات ذلك وزعموا أنه لم يكن من شاعر إلا وعرض قصيدته على قريش لتقرر سلامتها اللغوية عنه [49] وقد فندت الاكتشافات الأثرية وكتابات المؤرخين المعاصرة لتلك الفترات نظرية تغلب لسان قريش على العرب وأن كعبة مكة كانت محط رحال القبائل بل كتابات الإخباريين واللغويون القدماء تناقض نفسها لاعتمادهم على الروايات واللجوء للوضع والكذب لإثبات آرائهم [50] فلغويو العرب القدماء أرادوا رفع شأن قبيلة النبي محمد ومع ذلك يناقضون أنفسهم حين يذكرون أن النبي محمد كان يخاطب وفود العرب على اختلاف شعوبهم وقبائلهم وعلى مافي لغاتهم من اختلاف [51] منها ماورد عن علي بن أبي طالب عند قدوم وفد من قبائل نهد وتعجب علي من قدرة النبي على مخاطبة العرب بكل لهجاتهم ففي هذا تناقض صريح عن ما أورده الأخباريين انفسهم عن توحد لهجات العرب قبل الإسلام ودلالة أن اختلاف اللهجات لدرجة أنها قد لا تكون مفهومة كان امرا طبيعيا وشائعا بين العرب في تلك الأزمان [52] أما الوارد بشأن دور سوق عكاظ في تهذيب اللغة فضعيف فعمر السوق لا يتجاوز الخمسة عشر سنة قبل الإسلام وحتى لو كان له الدور المزعوم في كتابات الإخباريين، فانه لا يعتبر دلالة قطعية على دور قريش قبل الإسلام في توحيد لهجات العرب فهم كانوا مثل غيرهم من قصاد ذلك السوق [53] كذلك استفسار صحابة قرشيين عن ألفاظ وكلمات واردة في القرآن يضعف أنها لغة قريش ودأب المفسرون على الاستشهاد بلغات العرب وسؤالهم لمعرفة مااشكل عليهم فهمه من كلمات القرآن ونادراً مااستشهدوا بقريش [54] فدور قريش المزعوم في تهذيب اللغة العربية وأن لغتهم كانت لغة القرآن فرضية تنخرها التناقضات من كل جانب في كتابات اللغوييين العرب القدماء أنفسهم بالإضافة للشواهد الأثرية التي لا تبتعد عن الإسلام كثيراً وهي كتابات مدونة بعربية مختلفة عن عربية القرآن في جنوب وشمال الجزيرة [55] 25 | ولكن يبقى السؤال عن أصل هذه اللغة فإن لم تكن لغة قريش فهي ليست لغة اللحيانيين والأنباط وليست بلغة الحميريين بالتأكيد [56] وهناك رأي آخر ظهر في كتابات الإخباريين وهي أن هذه العربية هي عربية مضر وخصصوا مضر دون ربيعة [57] مع أن غالب من يسمون أنفسهم "علماء الأنساب" جعلوا ربيعة شخصاً وزعموا أنه أخ مضر ومع ذلك لم يتحدثوا عن لغة ربيعة ولم يترك أبناء ربيعة كتابة جاهلية بلغة كانوا يتحدثون ويكتبون بها تمكن الباحثين على الوقوف على لغتهم وما إذا كانت عربيتهم عربية القرآن ونظرية عدنان وأبنائه عصبية ظهرت بعد الإسلام ولا وجود لأثر لها قبله [58] هذه اللغة العربية هي عربية كل القبائل التي كانت تستخدم أداة التعريف "ال" عوضا عن الحرف (ن) في آخر الكلمة كما كان يفعل المتحدثين بالعربية الجنوبية القديمة أو (ها) وفق منطق المتحدثين بالعربية الشمالية القديمة. مع العلم أن كندة والمناذرة كانوا الوحيدين الذين تركوا كتابات جاهلية بعربية "ال" هذه دون سائر القبائل وهي ليست قبائل "عدنانية" ولم تدعي ذلك بعد الإسلام [59] في نفس الوقت، فإنه لا يجعلها عربية قحطانية وإن كانت قبائل كندة والمناذرة "قحطانية" في كتابات أهل الأخبار. وإن لم يعرف الباحثون أصل "عدنان" فإنهم يعرفون من أين أخذ النسابة والأخباريين قحطانهم فمصدرهم كان التوراة بشكل رئيسي وورد نص سبئي واحد عن أرض اسمها "قحطن" يملكها ملك مملكة كندة المدعو ربيعة آل ثور في أواخر القرن الثاني قبل الميلاد [60][61] ومع ذلك فإن كتابات كندة ونجد في تلك الفترة لم تكن بعربية القرآن بل كانت لغتهم "شبه سبئية" وإن استعملوا "ال" للتعريف وكتابة ملك المناذرة في بادية الشام تحوي ألفاظاً ومصطلحات تمنعها أن تعد من عربية القرآن [62] توحيد اللهجات حدث بعد تمكن الإسلام من العرب ودعوته إلى توحيد صفوفهم ونبذ الشرك، أصبح للعرب لغة واحدة تجمعهم وأصبح واجباً عليهم تعلم عربية القرآن والاهتمام بها فتغلبت لغة القرآن على ماسواها وهدم الإسلام ما كان قبله فتغيرت أسمائهم ولغاتهم بتغير دينهم ولا عبرة لكل الوارد عن وجود لعربية القرآن أو تغلبها عليهم قبل الإسلام فلا دليل أثري يثبت ذلك بصورة قطعية 26 | كان للفتوحات الإسلامية بعد وفاة النبي محمد كبير الأثر في نشر اللغة العربية في أصقاع مختلفة خارج شبه الجزيرة العربية، فبعد أن اعتنق كثير من السريان والأقباط والروم والأمازيغ والآشوريين الدين الإسلامي، أصبحوا عربًا باللغة كذلك الأمر، لسببين رئيسيين، منها أن اللغة الجديدة كانت لغة الدين حديث النشأة، وهي لغة مصدر التشريع الأساسي في الإسلام (القرآن، والأحاديث النبوية)، ولأن الصلاة وبعض العبادات أخرى، لا تتم إلا بإتقان بعض كلمات من هذه اللغة، وأيضًا لتعريب دواوين الأمصار حديثة الفتح، في عهد الخليفة الأموي عبد الملك بن مروان، وهكذا أصبحت العربية لغة السياسة والإدارة بعد أن نُقلت إليها المصطلحات الفنيّة في الإدارة والحساب.[64] وعلى الرغم من أن كثير من الأمم الأعجمية بقيت على هويتها ولم تتقبل الهوية العربية، مثل قسم كبير من الأمازيغ والترك والكرد والفرس وبعض الآشوريين والسريان، فإنها تلقنت اللغة العربية وتكلمتها بطلاقة إلى جانب لغتها الأم، وذلك لأن بعضها اعتنق الإسلام مثل الأكراد والفرس والأتراك، وحتى الذين بقوا على الدين المسيحي أو اليهودي أو المندائي الصابئي، تكلموا العربية كلغة رئيسية إلى جانب لغتهم الأم، بعد أن أصبحت لغة العلم والأدب خلال العصر الذهبي للدولة الإسلامية، تحت ظل الخلافة العباسيّة، بل أن تلك الشعوب اقتبست الأبجدية العربية في كتابة لغتها.[65] ومع مرور الوقت أصبحت اللغة العربية لغة الشعائر لعدد كبير من الكنائس المسيحية في الوطن العربي، مثل كنائس الروم الأرثوذكس، والروم الكاثوليك، والسريان، كما كتبت بها كثير من الأعمال الدينية والفكرية اليهودية في العصور الوسطى. 27 | ساهم عدد من الأعاجم في تطوير اللغة العربية ومصطلحاتها خلال العصرين الأموي والعباسي بفضل ما نقلوه إلى العربية من علوم مترجمة عن لغتهم الأم، فبرز في العربية كلمات ومصطلحات جديدة لم تكن معهودة من قبل، مثل "بيمارستان"، المأخوذة من الفارسية، وخلال العصر الذهبي بلغت اللغة العربية أقصى درجات الازدهار، حيث عبّر الأدباء والشعراء والعلماء العرب والعجم عن أفكارهم بهذه اللغة، فكُتبت آلاف المجلدات والمؤلفات والمخطوطات حول مختلف المواضيع بلسان العرب.[65] وكان من أهمية اللغة العربية في المجال العلمي والثقافي، أن اقتبست بعض اللغات الأوروبيّة كلمات منها أثناء العهد الصليبي في المشرق، أو عن طريق التثاقف والاختلاط مع عرب الأندلس، ومن أبرز اللغات التي تأثرت بالعربية: الإنكليزية والفرنسية والإسبانية والإيطالية والألمانية. 28 | عهد الركود[عدل] 29 | خلال القرن الثالث عشر اجتاح الشرق العربي المغول بقيادة هولاكو خان، فأمعنوا في معالم الثقافة والحضارة تدميرًا وتخريبًا، الأمر الذي ترك المسلمين في حال تصفها المستشرقة كارين آرمسترونغ باليتم، ففقهاء وعلماء العصر المملوكي لم يكونوا مهتمين بتطوير الفتاوي والاجتهادات الفقهية والعلوم المختلفة بقدر ما كانوا مهتمين بإعادة تجميع ما قد ضاع وفقد منها،[66] لكن على الرغم من ذلك فإن اللغة العربية استمرت لغة مهمة في البلدان الإسلامية، إلا أنها أخذت بالانحسار في شبه الجزيرة الأيبيرية مع قيام القشتاليين بإسقاط المدن الأندلسية شيئاً فشيئًا وقتل أو نفي أهلها المسلمين، كذلك فقد أخذت أهميتها العلمية تتراجع بعد ركود الاكتشافات العلمية العربية، وبدء انتقال شعلة الحضارة إلى أوروبا. 30 | بالمقابل أخذت اللغة العربية تجد موطئ قدم لها، كلغة دين بشكل أساسي، في الأناضول وبلاد البلقان بفضل الفتوحات العثمانية في تلك النواحي، واعتناق عدد من السكان للإسلام، ومن أبرز الأدلّة على انتشار اللغة العربية في تلك الأصقاع الحجة المؤسسة لمدينة سراييفو في سنة 1462، والتي كُتبت باللغة العربية بعد أن خضعت للحكم العثماني.[67] أصبحت اللغة العربية اللغة الرسمية الثانية في الدولة الإسلامية عند انتقال الخلافة إلى بني عثمان، وبحلول القرن السادس عشر كانت اللغة العربية قد استحالت لغة الدين الإسلامي فقط، وقلّت أهميتها بالنسبة للعلوم والآداب، إذ أن العهد العثماني لا يتسم بمنجزات علمية أو ثقافية ذات شأن، كما كان الحال في العهد العبّاسي، وخلال هذا العهد أخذت مسافة الخلاف تتسع بين اللهجات العربية حتى أصبح بعضها غريبًا عن بعض في النطق والتعبير. 31 | بعد أن سيطر على اللغة العربية شيءٌ من الركود طيلة ما يقرب من 400 سنة، أخذت في أواخر القرن التاسع عشر تشهد بعض الانتعاش. تجلّى هذا الانتعاش بنهضة ثقافية في بلاد الشام ومصر بسبب ازدياد نسبة المتعلمين وافتتاح كثير من المطابع التي قامت بتجميع الحروف العربية، ونشرت الصحف الحديثة بهذه اللغة لأول مرة، كذلك ظهرت عدّة جمعيات أدبيّة وأدباء وشعراء كبار ساهموا في إحياء اللغة العربية الفصحى، ومن هؤلاء: أحمد شوقي الملقب بأمير الشعراء، الشيخ ناصيف اليازجي، المعلّم بطرس البستاني، أمين الريحاني، وجبران خليل جبران.[68] وقد أسس هؤلاء الأدباء القواميس والمعاجم الحديثة مثل دائرة المعارف وقاموس محيط المحيط، ووفروا مؤلفات فيّمة في مختلف فنون المعرفة، بعد أن ترجموا واقتبسوا روائع الفكر الغربي، كذلك يسّر الأدباء العرب في تلك الفترة اللغة العربية وقواعدها، فوضعوا لها المعاجم الحديثة التي لا تزال متداولة حتى الآن، وتأسست الصحافة العربية لتعيد إحياء الفكر العربي وتوقظ القرّاء على أخبار بلادهم المحلية والأخبار العالميّة.[68] ومن أبرز المدارس الفكرية العربية التي برزت في ذلك العهد مدرسة أدب المهجر، وهو الأدب الذي أنشأه العرب الذين هاجروا من بلاد الشام إلى أمريكا الشمالية والجنوبية، وكونوا جاليات عربية، وروابط أدبية أخرجت صحفًا ومجلات تهتم بشؤونهم وأدبهم، وأنشأ أتباعها عدّة نقابات أبرزها الرابطة القلمية.[69] 32 | يُلاحظ أن هذا الانتعاش للغة العربية كان انتعاشًا في الحقل الأدبي فحسب، أما في الحقل العلمي فلم تلعب اللغة العربية دورًا كبيرًا كما في السابق، ولم تكن في أغلب الأحيان إلا لغة تلقين مواد علمية في بعض المدارس والجامعات، وقد تراجع دورها هذا بشكل كبير حتى، خصوصًا بعد نهاية الحرب الباردة بين المعسكرين الشيوعي والرأسمالي في أواخر القرن العشرين، واتجاه العالم نحو نظام الكون الواحد، حيث انتشرت اللغة الإنكليزية في أغلب الدول العربية، وغدا كثيرون يتكلمونها كلغة ثانية، خصوصًا بعد أن أصبحت هي لغة العلم والتجارة المتداولة. 33 | يتحدث العربية اليوم أكثر من 422 مليون نسمة،(1) ويتوزع متحدثوها بشكل رئيسي في المنطقة المعروفة باسم الوطن العربي، بالإضافة إلى العديد من المناطق الأخرى المجاورة له كالأحواز وتركيا وتشاد ومالي والسنغال وإرتيريا. كما أنها تُدرّس بشكل رسمي أو غير رسمي في الدول الإسلامية والدول الإفريقية المحاذية للوطن العربي، إلا عدد اللغات التي تستخدم الأبجدية العربية تراجع بعض الشيء، كون عدد من الدول مثل أذربيجان وتركيا عدل عن استخدام تلك الأبجدية واستعاض 34 | تعدد اللهجات كان موجودا عند العرب من أيام الجاهلية، حيث كانت هناك لهجة لكل قبيلة من القبائل. وقد استمر الوضع هكذا بعد مجيء الإسلام. ومن أبرز الأسباب التي أدّت لولادة لهجات عربية مختلفة في القِدم هو أن العرب كانوا في بداية عهدهم أميين لا تربطهم تجارة ولا إمارة ولا دين، فكان من الطبيعي أن ينشأ من ذلك ومن اختلاف الوضع والارتجال، ومن كثرة الحل والترحال، وتأثير الخلطة والاعتزال، اضطراب في اللغة كالترادف، واختلاف اللهجات في الإبدال والإعلال والبناء والإعراب.[70] ومن أبرز اللهجات والألفاظ: عجعجة قُضاعة أي قلب الياء جيمًا بعد العين وبعد الياء المشددة، مثل راعي يقولون فيها: راعج. وفي كرسي كرسج، وطمطمانية حِمْير وهي جعل "إم" بدل "أل" في التعريف، فيقولون في البر: أمبر، وفي الصيام أمصيام، وفحفحة هذيل أي جعل الحاء عينًا، مثل: أحل إليه فيقولون أعل إليه، وعنعنة تميم وهي إبدال العين في الهمزة إذا وقعت في أول الكلمة، فيقولون في أمان: عمان، وكشكشة أسد أي جعل الكاف شينًا مثل "عليك" فيقولونها: "عليش"، وقطْعةِ طيئ وهي حذف آخر الكلمة، مثل قولهم: يا أبا الحسن، تصبح: يا أبا الحسا، وغير ذلك مما باعد بين الألسنة وأوشك أن يقسم اللغة إلى لغات لا يتفاهم أهلها ولا يتقارب أصلها.[70] 35 | وقد كان التواصل بين أفراد القبيلة الواحدة يَتم بواسطة لهجتها الخاصة، أما عندما يَخطب شخص ما أو يَتحدث إلى أشخاص من قبائل أخرى فيستعمل حينها اللغة الواحدة المشتركة. وقد استمر الوضع هكذا بعد مجيء الإسلام. ويُرجح أن العامية الحديثة بدأت حين الفتوحات الإسلامية، حيث أن المسلمين الجدد في بلاد الأعاجم (والتي أصبح العديد منها اليوم من البلدان العربية) بدؤوا بتعلم العربية لكنهم - وبشكل طبيعي - لم يَستطيعوا تحدثها كما يتحدثها العرب بالضبط، وبالتالي فقد حرّفت قليلاً. وفي ذلك الوقت لم يَكن الفرق واضحاً كثيراً، لكن بالتدريج حرفت العربية وتغيرت صفاتها الصوتية وتركيب الجمل فيها إلخ.. حتى تحوّلت إلى اللهجات العامية الحديثة. -------------------------------------------------------------------------------- /examples/colored.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Image-colored wordcloud 4 | ======================= 5 | 6 | You can color a word-cloud by using an image-based coloring strategy 7 | implemented in ImageColorGenerator. It uses the average color of the region 8 | occupied by the word in a source image. You can combine this with masking - 9 | pure-white will be interpreted as 'don't occupy' by the WordCloud object when 10 | passed as mask. 11 | If you want white as a legal color, you can just pass a different image to 12 | "mask", but make sure the image shapes line up. 13 | """ 14 | 15 | from os import path 16 | from PIL import Image 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | import os 20 | 21 | from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator 22 | 23 | # get data directory (using getcwd() is needed to support running example in generated IPython notebook) 24 | d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() 25 | 26 | # Read the whole text. 27 | text = open(path.join(d, 'alice.txt')).read() 28 | 29 | # read the mask / color image taken from 30 | # http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010 31 | alice_coloring = np.array(Image.open(path.join(d, "alice_color.png"))) 32 | stopwords = set(STOPWORDS) 33 | stopwords.add("said") 34 | 35 | wc = WordCloud(background_color="white", max_words=2000, mask=alice_coloring, 36 | stopwords=stopwords, max_font_size=40, random_state=42) 37 | # generate word cloud 38 | wc.generate(text) 39 | 40 | # create coloring from image 41 | image_colors = ImageColorGenerator(alice_coloring) 42 | 43 | # show 44 | fig, axes = plt.subplots(1, 3) 45 | axes[0].imshow(wc, interpolation="bilinear") 46 | # recolor wordcloud and show 47 | # we could also give color_func=image_colors directly in the constructor 48 | axes[1].imshow(wc.recolor(color_func=image_colors), interpolation="bilinear") 49 | axes[2].imshow(alice_coloring, cmap=plt.cm.gray, interpolation="bilinear") 50 | for ax in axes: 51 | ax.set_axis_off() 52 | plt.show() 53 | -------------------------------------------------------------------------------- /examples/colored_by_group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/colored_by_group.png -------------------------------------------------------------------------------- /examples/colored_by_group.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Colored by Group Example 4 | ======================== 5 | 6 | Generating a word cloud that assigns colors to words based on 7 | a predefined mapping from colors to words 8 | """ 9 | 10 | from wordcloud import (WordCloud, get_single_color_func) 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | class SimpleGroupedColorFunc(object): 15 | """Create a color function object which assigns EXACT colors 16 | to certain words based on the color to words mapping 17 | 18 | Parameters 19 | ---------- 20 | color_to_words : dict(str -> list(str)) 21 | A dictionary that maps a color to the list of words. 22 | 23 | default_color : str 24 | Color that will be assigned to a word that's not a member 25 | of any value from color_to_words. 26 | """ 27 | 28 | def __init__(self, color_to_words, default_color): 29 | self.word_to_color = {word: color 30 | for (color, words) in color_to_words.items() 31 | for word in words} 32 | 33 | self.default_color = default_color 34 | 35 | def __call__(self, word, **kwargs): 36 | return self.word_to_color.get(word, self.default_color) 37 | 38 | 39 | class GroupedColorFunc(object): 40 | """Create a color function object which assigns DIFFERENT SHADES of 41 | specified colors to certain words based on the color to words mapping. 42 | 43 | Uses wordcloud.get_single_color_func 44 | 45 | Parameters 46 | ---------- 47 | color_to_words : dict(str -> list(str)) 48 | A dictionary that maps a color to the list of words. 49 | 50 | default_color : str 51 | Color that will be assigned to a word that's not a member 52 | of any value from color_to_words. 53 | """ 54 | 55 | def __init__(self, color_to_words, default_color): 56 | self.color_func_to_words = [ 57 | (get_single_color_func(color), set(words)) 58 | for (color, words) in color_to_words.items()] 59 | 60 | self.default_color_func = get_single_color_func(default_color) 61 | 62 | def get_color_func(self, word): 63 | """Returns a single_color_func associated with the word""" 64 | try: 65 | color_func = next( 66 | color_func for (color_func, words) in self.color_func_to_words 67 | if word in words) 68 | except StopIteration: 69 | color_func = self.default_color_func 70 | 71 | return color_func 72 | 73 | def __call__(self, word, **kwargs): 74 | return self.get_color_func(word)(word, **kwargs) 75 | 76 | 77 | text = """The Zen of Python, by Tim Peters 78 | Beautiful is better than ugly. 79 | Explicit is better than implicit. 80 | Simple is better than complex. 81 | Complex is better than complicated. 82 | Flat is better than nested. 83 | Sparse is better than dense. 84 | Readability counts. 85 | Special cases aren't special enough to break the rules. 86 | Although practicality beats purity. 87 | Errors should never pass silently. 88 | Unless explicitly silenced. 89 | In the face of ambiguity, refuse the temptation to guess. 90 | There should be one-- and preferably only one --obvious way to do it. 91 | Although that way may not be obvious at first unless you're Dutch. 92 | Now is better than never. 93 | Although never is often better than *right* now. 94 | If the implementation is hard to explain, it's a bad idea. 95 | If the implementation is easy to explain, it may be a good idea. 96 | Namespaces are one honking great idea -- let's do more of those!""" 97 | 98 | # Since the text is small collocations are turned off and text is lower-cased 99 | wc = WordCloud(collocations=False).generate(text.lower()) 100 | 101 | color_to_words = { 102 | # words below will be colored with a green single color function 103 | '#00ff00': ['beautiful', 'explicit', 'simple', 'sparse', 104 | 'readability', 'rules', 'practicality', 105 | 'explicitly', 'one', 'now', 'easy', 'obvious', 'better'], 106 | # will be colored with a red single color function 107 | 'red': ['ugly', 'implicit', 'complex', 'complicated', 'nested', 108 | 'dense', 'special', 'errors', 'silently', 'ambiguity', 109 | 'guess', 'hard'] 110 | } 111 | 112 | # Words that are not in any of the color_to_words values 113 | # will be colored with a grey single color function 114 | default_color = 'grey' 115 | 116 | # Create a color function with single tone 117 | # grouped_color_func = SimpleGroupedColorFunc(color_to_words, default_color) 118 | 119 | # Create a color function with multiple tones 120 | grouped_color_func = GroupedColorFunc(color_to_words, default_color) 121 | 122 | # Apply our color function 123 | wc.recolor(color_func=grouped_color_func) 124 | 125 | # Plot 126 | plt.figure() 127 | plt.imshow(wc, interpolation="bilinear") 128 | plt.axis("off") 129 | plt.show() 130 | -------------------------------------------------------------------------------- /examples/constitution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/constitution.png -------------------------------------------------------------------------------- /examples/constitution.txt: -------------------------------------------------------------------------------- 1 | We the People of the United States, in Order to form a more perfect Union, establish Justice, insure domestic Tranquility, provide for the common defence, promote the general Welfare, and secure the Blessings of Liberty to ourselves and our Posterity, do ordain and establish this Constitution for the United States of America. 2 | 3 | Article. I. 4 | 5 | Section. 1. 6 | 7 | All legislative Powers herein granted shall be vested in a Congress of the United States, which shall consist of a Senate and House of Representatives. 8 | 9 | Section. 2. 10 | 11 | The House of Representatives shall be composed of Members chosen every second Year by the People of the several States, and the Electors in each State shall have the Qualifications requisite for Electors of the most numerous Branch of the State Legislature. 12 | 13 | No Person shall be a Representative who shall not have attained to the Age of twenty five Years, and been seven Years a Citizen of the United States, and who shall not, when elected, be an Inhabitant of that State in which he shall be chosen. 14 | 15 | Representatives and direct Taxes shall be apportioned among the several States which may be included within this Union, according to their respective Numbers, which shall be determined by adding to the whole Number of free Persons, including those bound to Service for a Term of Years, and excluding Indians not taxed, three fifths of all other Persons. The actual Enumeration shall be made within three Years after the first Meeting of the Congress of the United States, and within every subsequent Term of ten Years, in such Manner as they shall by Law direct. The Number of Representatives shall not exceed one for every thirty Thousand, but each State shall have at Least one Representative; and until such enumeration shall be made, the State of New Hampshire shall be entitled to chuse three, Massachusetts eight, Rhode-Island and Providence Plantations one, Connecticut five, New-York six, New Jersey four, Pennsylvania eight, Delaware one, Maryland six, Virginia ten, North Carolina five, South Carolina five, and Georgia three. 16 | 17 | When vacancies happen in the Representation from any State, the Executive Authority thereof shall issue Writs of Election to fill such Vacancies. 18 | 19 | The House of Representatives shall chuse their Speaker and other Officers; and shall have the sole Power of Impeachment. 20 | 21 | Section. 3. 22 | 23 | The Senate of the United States shall be composed of two Senators from each State, chosen by the Legislature thereof for six Years; and each Senator shall have one Vote. 24 | 25 | Immediately after they shall be assembled in Consequence of the first Election, they shall be divided as equally as may be into three Classes. The Seats of the Senators of the first Class shall be vacated at the Expiration of the second Year, of the second Class at the Expiration of the fourth Year, and of the third Class at the Expiration of the sixth Year, so that one third may be chosen every second Year; and if Vacancies happen by Resignation, or otherwise, during the Recess of the Legislature of any State, the Executive thereof may make temporary Appointments until the next Meeting of the Legislature, which shall then fill such Vacancies. 26 | 27 | No Person shall be a Senator who shall not have attained to the Age of thirty Years, and been nine Years a Citizen of the United States, and who shall not, when elected, be an Inhabitant of that State for which he shall be chosen. 28 | 29 | The Vice President of the United States shall be President of the Senate, but shall have no Vote, unless they be equally divided. 30 | 31 | The Senate shall chuse their other Officers, and also a President pro tempore, in the Absence of the Vice President, or when he shall exercise the Office of President of the United States. 32 | 33 | The Senate shall have the sole Power to try all Impeachments. When sitting for that Purpose, they shall be on Oath or Affirmation. When the President of the United States is tried, the Chief Justice shall preside: And no Person shall be convicted without the Concurrence of two thirds of the Members present. 34 | 35 | Judgment in Cases of Impeachment shall not extend further than to removal from Office, and disqualification to hold and enjoy any Office of honor, Trust or Profit under the United States: but the Party convicted shall nevertheless be liable and subject to Indictment, Trial, Judgment and Punishment, according to Law. 36 | 37 | Section. 4. 38 | 39 | The Times, Places and Manner of holding Elections for Senators and Representatives, shall be prescribed in each State by the Legislature thereof; but the Congress may at any time by Law make or alter such Regulations, except as to the Places of chusing Senators. 40 | 41 | The Congress shall assemble at least once in every Year, and such Meeting shall be on the first Monday in December, unless they shall by Law appoint a different Day. 42 | 43 | Section. 5. 44 | 45 | Each House shall be the Judge of the Elections, Returns and Qualifications of its own Members, and a Majority of each shall constitute a Quorum to do Business; but a smaller Number may adjourn from day to day, and may be authorized to compel the Attendance of absent Members, in such Manner, and under such Penalties as each House may provide. 46 | 47 | Each House may determine the Rules of its Proceedings, punish its Members for disorderly Behaviour, and, with the Concurrence of two thirds, expel a Member. 48 | 49 | Each House shall keep a Journal of its Proceedings, and from time to time publish the same, excepting such Parts as may in their Judgment require Secrecy; and the Yeas and Nays of the Members of either House on any question shall, at the Desire of one fifth of those Present, be entered on the Journal. 50 | 51 | Neither House, during the Session of Congress, shall, without the Consent of the other, adjourn for more than three days, nor to any other Place than that in which the two Houses shall be sitting. 52 | 53 | Section. 6. 54 | 55 | The Senators and Representatives shall receive a Compensation for their Services, to be ascertained by Law, and paid out of the Treasury of the United States. They shall in all Cases, except Treason, Felony and Breach of the Peace, be privileged from Arrest during their Attendance at the Session of their respective Houses, and in going to and returning from the same; and for any Speech or Debate in either House, they shall not be questioned in any other Place. 56 | 57 | No Senator or Representative shall, during the Time for which he was elected, be appointed to any civil Office under the Authority of the United States, which shall have been created, or the Emoluments whereof shall have been encreased during such time; and no Person holding any Office under the United States, shall be a Member of either House during his Continuance in Office. 58 | 59 | Section. 7. 60 | 61 | All Bills for raising Revenue shall originate in the House of Representatives; but the Senate may propose or concur with Amendments as on other Bills. 62 | 63 | Every Bill which shall have passed the House of Representatives and the Senate, shall, before it become a Law, be presented to the President of the United States: If he approve he shall sign it, but if not he shall return it, with his Objections to that House in which it shall have originated, who shall enter the Objections at large on their Journal, and proceed to reconsider it. If after such Reconsideration two thirds of that House shall agree to pass the Bill, it shall be sent, together with the Objections, to the other House, by which it shall likewise be reconsidered, and if approved by two thirds of that House, it shall become a Law. But in all such Cases the Votes of both Houses shall be determined by yeas and Nays, and the Names of the Persons voting for and against the Bill shall be entered on the Journal of each House respectively. If any Bill shall not be returned by the President within ten Days (Sundays excepted) after it shall have been presented to him, the Same shall be a Law, in like Manner as if he had signed it, unless the Congress by their Adjournment prevent its Return, in which Case it shall not be a Law. 64 | 65 | Every Order, Resolution, or Vote to which the Concurrence of the Senate and House of Representatives may be necessary (except on a question of Adjournment) shall be presented to the President of the United States; and before the Same shall take Effect, shall be approved by him, or being disapproved by him, shall be repassed by two thirds of the Senate and House of Representatives, according to the Rules and Limitations prescribed in the Case of a Bill. 66 | 67 | Section. 8. 68 | 69 | The Congress shall have Power To lay and collect Taxes, Duties, Imposts and Excises, to pay the Debts and provide for the common Defence and general Welfare of the United States; but all Duties, Imposts and Excises shall be uniform throughout the United States; 70 | 71 | To borrow Money on the credit of the United States; 72 | 73 | To regulate Commerce with foreign Nations, and among the several States, and with the Indian Tribes; 74 | 75 | To establish an uniform Rule of Naturalization, and uniform Laws on the subject of Bankruptcies throughout the United States; 76 | 77 | To coin Money, regulate the Value thereof, and of foreign Coin, and fix the Standard of Weights and Measures; 78 | 79 | To provide for the Punishment of counterfeiting the Securities and current Coin of the United States; 80 | 81 | To establish Post Offices and post Roads; 82 | 83 | To promote the Progress of Science and useful Arts, by securing for limited Times to Authors and Inventors the exclusive Right to their respective Writings and Discoveries; 84 | 85 | To constitute Tribunals inferior to the supreme Court; 86 | 87 | To define and punish Piracies and Felonies committed on the high Seas, and Offences against the Law of Nations; 88 | 89 | To declare War, grant Letters of Marque and Reprisal, and make Rules concerning Captures on Land and Water; 90 | 91 | To raise and support Armies, but no Appropriation of Money to that Use shall be for a longer Term than two Years; 92 | 93 | To provide and maintain a Navy; 94 | 95 | To make Rules for the Government and Regulation of the land and naval Forces; 96 | 97 | To provide for calling forth the Militia to execute the Laws of the Union, suppress Insurrections and repel Invasions; 98 | 99 | To provide for organizing, arming, and disciplining, the Militia, and for governing such Part of them as may be employed in the Service of the United States, reserving to the States respectively, the Appointment of the Officers, and the Authority of training the Militia according to the discipline prescribed by Congress; 100 | 101 | To exercise exclusive Legislation in all Cases whatsoever, over such District (not exceeding ten Miles square) as may, by Cession of particular States, and the Acceptance of Congress, become the Seat of the Government of the United States, and to exercise like Authority over all Places purchased by the Consent of the Legislature of the State in which the Same shall be, for the Erection of Forts, Magazines, Arsenals, dock-Yards, and other needful Buildings;--And 102 | 103 | To make all Laws which shall be necessary and proper for carrying into Execution the foregoing Powers, and all other Powers vested by this Constitution in the Government of the United States, or in any Department or Officer thereof. 104 | 105 | Section. 9. 106 | 107 | The Migration or Importation of such Persons as any of the States now existing shall think proper to admit, shall not be prohibited by the Congress prior to the Year one thousand eight hundred and eight, but a Tax or duty may be imposed on such Importation, not exceeding ten dollars for each Person. 108 | 109 | The Privilege of the Writ of Habeas Corpus shall not be suspended, unless when in Cases of Rebellion or Invasion the public Safety may require it. 110 | 111 | No Bill of Attainder or ex post facto Law shall be passed. 112 | 113 | No Capitation, or other direct, Tax shall be laid, unless in Proportion to the Census or enumeration herein before directed to be taken. 114 | 115 | No Tax or Duty shall be laid on Articles exported from any State. 116 | 117 | No Preference shall be given by any Regulation of Commerce or Revenue to the Ports of one State over those of another; nor shall Vessels bound to, or from, one State, be obliged to enter, clear, or pay Duties in another. 118 | 119 | No Money shall be drawn from the Treasury, but in Consequence of Appropriations made by Law; and a regular Statement and Account of the Receipts and Expenditures of all public Money shall be published from time to time. 120 | 121 | No Title of Nobility shall be granted by the United States: And no Person holding any Office of Profit or Trust under them, shall, without the Consent of the Congress, accept of any present, Emolument, Office, or Title, of any kind whatever, from any King, Prince, or foreign State. 122 | 123 | Section. 10. 124 | 125 | No State shall enter into any Treaty, Alliance, or Confederation; grant Letters of Marque and Reprisal; coin Money; emit Bills of Credit; make any Thing but gold and silver Coin a Tender in Payment of Debts; pass any Bill of Attainder, ex post facto Law, or Law impairing the Obligation of Contracts, or grant any Title of Nobility. 126 | 127 | No State shall, without the Consent of the Congress, lay any Imposts or Duties on Imports or Exports, except what may be absolutely necessary for executing it's inspection Laws: and the net Produce of all Duties and Imposts, laid by any State on Imports or Exports, shall be for the Use of the Treasury of the United States; and all such Laws shall be subject to the Revision and Controul of the Congress. 128 | 129 | No State shall, without the Consent of Congress, lay any Duty of Tonnage, keep Troops, or Ships of War in time of Peace, enter into any Agreement or Compact with another State, or with a foreign Power, or engage in War, unless actually invaded, or in such imminent Danger as will not admit of delay. 130 | 131 | Article. II. 132 | 133 | Section. 1. 134 | 135 | The executive Power shall be vested in a President of the United States of America. He shall hold his Office during the Term of four Years, and, together with the Vice President, chosen for the same Term, be elected, as follows: 136 | 137 | Each State shall appoint, in such Manner as the Legislature thereof may direct, a Number of Electors, equal to the whole Number of Senators and Representatives to which the State may be entitled in the Congress: but no Senator or Representative, or Person holding an Office of Trust or Profit under the United States, shall be appointed an Elector. 138 | 139 | The Electors shall meet in their respective States, and vote by Ballot for two Persons, of whom one at least shall not be an Inhabitant of the same State with themselves. And they shall make a List of all the Persons voted for, and of the Number of Votes for each; which List they shall sign and certify, and transmit sealed to the Seat of the Government of the United States, directed to the President of the Senate. The President of the Senate shall, in the Presence of the Senate and House of Representatives, open all the Certificates, and the Votes shall then be counted. The Person having the greatest Number of Votes shall be the President, if such Number be a Majority of the whole Number of Electors appointed; and if there be more than one who have such Majority, and have an equal Number of Votes, then the House of Representatives shall immediately chuse by Ballot one of them for President; and if no Person have a Majority, then from the five highest on the List the said House shall in like Manner chuse the President. But in chusing the President, the Votes shall be taken by States, the Representation from each State having one Vote; A quorum for this purpose shall consist of a Member or Members from two thirds of the States, and a Majority of all the States shall be necessary to a Choice. In every Case, after the Choice of the President, the Person having the greatest Number of Votes of the Electors shall be the Vice President. But if there should remain two or more who have equal Votes, the Senate shall chuse from them by Ballot the Vice President. 140 | 141 | The Congress may determine the Time of chusing the Electors, and the Day on which they shall give their Votes; which Day shall be the same throughout the United States. 142 | 143 | No Person except a natural born Citizen, or a Citizen of the United States, at the time of the Adoption of this Constitution, shall be eligible to the Office of President; neither shall any Person be eligible to that Office who shall not have attained to the Age of thirty five Years, and been fourteen Years a Resident within the United States. 144 | 145 | In Case of the Removal of the President from Office, or of his Death, Resignation, or Inability to discharge the Powers and Duties of the said Office, the Same shall devolve on the Vice President, and the Congress may by Law provide for the Case of Removal, Death, Resignation or Inability, both of the President and Vice President, declaring what Officer shall then act as President, and such Officer shall act accordingly, until the Disability be removed, or a President shall be elected. 146 | 147 | The President shall, at stated Times, receive for his Services, a Compensation, which shall neither be increased nor diminished during the Period for which he shall have been elected, and he shall not receive within that Period any other Emolument from the United States, or any of them. 148 | 149 | Before he enter on the Execution of his Office, he shall take the following Oath or Affirmation:--"I do solemnly swear (or affirm) that I will faithfully execute the Office of President of the United States, and will to the best of my Ability, preserve, protect and defend the Constitution of the United States." 150 | 151 | Section. 2. 152 | 153 | The President shall be Commander in Chief of the Army and Navy of the United States, and of the Militia of the several States, when called into the actual Service of the United States; he may require the Opinion, in writing, of the principal Officer in each of the executive Departments, upon any Subject relating to the Duties of their respective Offices, and he shall have Power to grant Reprieves and Pardons for Offences against the United States, except in Cases of Impeachment. 154 | 155 | He shall have Power, by and with the Advice and Consent of the Senate, to make Treaties, provided two thirds of the Senators present concur; and he shall nominate, and by and with the Advice and Consent of the Senate, shall appoint Ambassadors, other public Ministers and Consuls, Judges of the supreme Court, and all other Officers of the United States, whose Appointments are not herein otherwise provided for, and which shall be established by Law: but the Congress may by Law vest the Appointment of such inferior Officers, as they think proper, in the President alone, in the Courts of Law, or in the Heads of Departments. 156 | 157 | The President shall have Power to fill up all Vacancies that may happen during the Recess of the Senate, by granting Commissions which shall expire at the End of their next Session. 158 | 159 | Section. 3. 160 | 161 | He shall from time to time give to the Congress Information of the State of the Union, and recommend to their Consideration such Measures as he shall judge necessary and expedient; he may, on extraordinary Occasions, convene both Houses, or either of them, and in Case of Disagreement between them, with Respect to the Time of Adjournment, he may adjourn them to such Time as he shall think proper; he shall receive Ambassadors and other public Ministers; he shall take Care that the Laws be faithfully executed, and shall Commission all the Officers of the United States. 162 | 163 | Section. 4. 164 | 165 | The President, Vice President and all civil Officers of the United States, shall be removed from Office on Impeachment for, and Conviction of, Treason, Bribery, or other high Crimes and Misdemeanors. 166 | 167 | Article III. 168 | 169 | Section. 1. 170 | 171 | The judicial Power of the United States shall be vested in one supreme Court, and in such inferior Courts as the Congress may from time to time ordain and establish. The Judges, both of the supreme and inferior Courts, shall hold their Offices during good Behaviour, and shall, at stated Times, receive for their Services a Compensation, which shall not be diminished during their Continuance in Office. 172 | 173 | Section. 2. 174 | 175 | The judicial Power shall extend to all Cases, in Law and Equity, arising under this Constitution, the Laws of the United States, and Treaties made, or which shall be made, under their Authority;--to all Cases affecting Ambassadors, other public Ministers and Consuls;--to all Cases of admiralty and maritime Jurisdiction;--to Controversies to which the United States shall be a Party;--to Controversies between two or more States;-- between a State and Citizens of another State,--between Citizens of different States,--between Citizens of the same State claiming Lands under Grants of different States, and between a State, or the Citizens thereof, and foreign States, Citizens or Subjects. 176 | 177 | In all Cases affecting Ambassadors, other public Ministers and Consuls, and those in which a State shall be Party, the supreme Court shall have original Jurisdiction. In all the other Cases before mentioned, the supreme Court shall have appellate Jurisdiction, both as to Law and Fact, with such Exceptions, and under such Regulations as the Congress shall make. 178 | 179 | The Trial of all Crimes, except in Cases of Impeachment, shall be by Jury; and such Trial shall be held in the State where the said Crimes shall have been committed; but when not committed within any State, the Trial shall be at such Place or Places as the Congress may by Law have directed. 180 | 181 | Section. 3. 182 | 183 | Treason against the United States, shall consist only in levying War against them, or in adhering to their Enemies, giving them Aid and Comfort. No Person shall be convicted of Treason unless on the Testimony of two Witnesses to the same overt Act, or on Confession in open Court. 184 | 185 | The Congress shall have Power to declare the Punishment of Treason, but no Attainder of Treason shall work Corruption of Blood, or Forfeiture except during the Life of the Person attainted. 186 | 187 | Article. IV. 188 | 189 | Section. 1. 190 | 191 | Full Faith and Credit shall be given in each State to the public Acts, Records, and judicial Proceedings of every other State. And the Congress may by general Laws prescribe the Manner in which such Acts, Records and Proceedings shall be proved, and the Effect thereof. 192 | 193 | Section. 2. 194 | 195 | The Citizens of each State shall be entitled to all Privileges and Immunities of Citizens in the several States. 196 | 197 | A Person charged in any State with Treason, Felony, or other Crime, who shall flee from Justice, and be found in another State, shall on Demand of the executive Authority of the State from which he fled, be delivered up, to be removed to the State having Jurisdiction of the Crime. 198 | 199 | No Person held to Service or Labour in one State, under the Laws thereof, escaping into another, shall, in Consequence of any Law or Regulation therein, be discharged from such Service or Labour, but shall be delivered up on Claim of the Party to whom such Service or Labour may be due. 200 | 201 | Section. 3. 202 | 203 | New States may be admitted by the Congress into this Union; but no new State shall be formed or erected within the Jurisdiction of any other State; nor any State be formed by the Junction of two or more States, or Parts of States, without the Consent of the Legislatures of the States concerned as well as of the Congress. 204 | 205 | The Congress shall have Power to dispose of and make all needful Rules and Regulations respecting the Territory or other Property belonging to the United States; and nothing in this Constitution shall be so construed as to Prejudice any Claims of the United States, or of any particular State. 206 | 207 | Section. 4. 208 | 209 | The United States shall guarantee to every State in this Union a Republican Form of Government, and shall protect each of them against Invasion; and on Application of the Legislature, or of the Executive (when the Legislature cannot be convened), against domestic Violence. 210 | 211 | Article. V. 212 | 213 | The Congress, whenever two thirds of both Houses shall deem it necessary, shall propose Amendments to this Constitution, or, on the Application of the Legislatures of two thirds of the several States, shall call a Convention for proposing Amendments, which, in either Case, shall be valid to all Intents and Purposes, as Part of this Constitution, when ratified by the Legislatures of three fourths of the several States, or by Conventions in three fourths thereof, as the one or the other Mode of Ratification may be proposed by the Congress; Provided that no Amendment which may be made prior to the Year One thousand eight hundred and eight shall in any Manner affect the first and fourth Clauses in the Ninth Section of the first Article; and that no State, without its Consent, shall be deprived of its equal Suffrage in the Senate. 214 | 215 | Article. VI. 216 | 217 | All Debts contracted and Engagements entered into, before the Adoption of this Constitution, shall be as valid against the United States under this Constitution, as under the Confederation. 218 | 219 | This Constitution, and the Laws of the United States which shall be made in Pursuance thereof; and all Treaties made, or which shall be made, under the Authority of the United States, shall be the supreme Law of the Land; and the Judges in every State shall be bound thereby, any Thing in the Constitution or Laws of any State to the Contrary notwithstanding. 220 | 221 | The Senators and Representatives before mentioned, and the Members of the several State Legislatures, and all executive and judicial Officers, both of the United States and of the several States, shall be bound by Oath or Affirmation, to support this Constitution; but no religious Test shall ever be required as a Qualification to any Office or public Trust under the United States. 222 | 223 | Article. VII. 224 | 225 | The Ratification of the Conventions of nine States, shall be sufficient for the Establishment of this Constitution between the States so ratifying the Same. 226 | 227 | The Word, "the," being interlined between the seventh and eighth Lines of the first Page, the Word "Thirty" being partly written on an Erazure in the fifteenth Line of the first Page, The Words "is tried" being interlined between the thirty second and thirty third Lines of the first Page and the Word "the" being interlined between the forty third and forty fourth Lines of the second Page. 228 | 229 | Attest William Jackson Secretary 230 | 231 | done in Convention by the Unanimous Consent of the States present the Seventeenth Day of September in the Year of our Lord one thousand seven hundred and Eighty seven and of the Independance of the United States of America the Twelfth In witness whereof We have hereunto subscribed our Names, 232 | -------------------------------------------------------------------------------- /examples/emoji.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Emoji Example 4 | =============== 5 | A simple example that shows how to include emoji. Note that this example does not seem to work on OS X, but does 6 | work correctly in Ubuntu. 7 | 8 | There are 3 important steps to follow to include emoji: 9 | 1) Read the text input with io.open instead of the built in open. This ensures that it is loaded as UTF-8 10 | 2) Override the regular expression used by word cloud to parse the text into words. The default expression 11 | will only match ascii words 12 | 3) Override the default font to something that supports emoji. The included Symbola font includes black and 13 | white outlines for most emoji. There are currently issues with the PIL/Pillow library that seem to prevent 14 | it from functioning correctly on OS X (https://github.com/python-pillow/Pillow/issues/1774), so try this 15 | on ubuntu if you are having problems. 16 | """ 17 | import io 18 | import os 19 | import string 20 | from os import path 21 | from wordcloud import WordCloud 22 | 23 | # get data directory (using getcwd() is needed to support running example in generated IPython notebook) 24 | d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() 25 | 26 | # It is important to use io.open to correctly load the file as UTF-8 27 | text = io.open(path.join(d, 'happy-emoji.txt')).read() 28 | 29 | # the regex used to detect words is a combination of normal words, ascii art, and emojis 30 | # 2+ consecutive letters (also include apostrophes), e.x It's 31 | normal_word = r"(?:\w[\w']+)" 32 | # 2+ consecutive punctuations, e.x. :) 33 | ascii_art = r"(?:[{punctuation}][{punctuation}]+)".format(punctuation=string.punctuation) 34 | # a single character that is not alpha_numeric or other ascii printable 35 | emoji = r"(?:[^\s])(? .08] = 255 37 | 38 | # create wordcloud. A bit sluggish, you can subsample more strongly for quicker rendering 39 | # relative_scaling=0 means the frequencies in the data are reflected less 40 | # acurately but it makes a better picture 41 | wc = WordCloud(max_words=2000, mask=parrot_mask, max_font_size=40, random_state=42, relative_scaling=0) 42 | 43 | # generate word cloud 44 | wc.generate(text) 45 | plt.imshow(wc) 46 | 47 | # create coloring from image 48 | image_colors = ImageColorGenerator(parrot_color) 49 | wc.recolor(color_func=image_colors) 50 | plt.figure(figsize=(10, 10)) 51 | plt.imshow(wc, interpolation="bilinear") 52 | wc.to_file("parrot_new.png") 53 | 54 | plt.figure(figsize=(10, 10)) 55 | plt.title("Original Image") 56 | plt.imshow(parrot_color) 57 | 58 | plt.figure(figsize=(10, 10)) 59 | plt.title("Edge map") 60 | plt.imshow(edges) 61 | plt.show() 62 | -------------------------------------------------------------------------------- /examples/parrot_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/parrot_new.png -------------------------------------------------------------------------------- /examples/simple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Minimal Example 4 | =============== 5 | 6 | Generating a square wordcloud from the US constitution using default arguments. 7 | """ 8 | 9 | import os 10 | 11 | from os import path 12 | from wordcloud import WordCloud 13 | 14 | # get data directory (using getcwd() is needed to support running example in generated IPython notebook) 15 | d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() 16 | 17 | # Read the whole text. 18 | text = open(path.join(d, 'constitution.txt')).read() 19 | 20 | # Generate a word cloud image 21 | wordcloud = WordCloud().generate(text) 22 | 23 | # Display the generated image: 24 | # the matplotlib way: 25 | import matplotlib.pyplot as plt 26 | plt.imshow(wordcloud, interpolation='bilinear') 27 | plt.axis("off") 28 | 29 | # lower max_font_size 30 | wordcloud = WordCloud(max_font_size=40).generate(text) 31 | plt.figure() 32 | plt.imshow(wordcloud, interpolation="bilinear") 33 | plt.axis("off") 34 | plt.show() 35 | 36 | # The pil way (if you don't have matplotlib) 37 | # image = wordcloud.to_image() 38 | # image.show() 39 | -------------------------------------------------------------------------------- /examples/single_word.py: -------------------------------------------------------------------------------- 1 | """ 2 | Single Word 3 | =========== 4 | 5 | Make a word cloud with a single word that's repeated. 6 | """ 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | from wordcloud import WordCloud 11 | 12 | text = "square" 13 | 14 | x, y = np.ogrid[:300, :300] 15 | 16 | mask = (x - 150) ** 2 + (y - 150) ** 2 > 130 ** 2 17 | mask = 255 * mask.astype(int) 18 | 19 | 20 | wc = WordCloud(background_color="white", repeat=True, mask=mask) 21 | wc.generate(text) 22 | 23 | plt.axis("off") 24 | plt.imshow(wc, interpolation="bilinear") 25 | plt.show() 26 | -------------------------------------------------------------------------------- /examples/stormtrooper_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/stormtrooper_mask.png -------------------------------------------------------------------------------- /examples/wc_cn/LuXun.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/wc_cn/LuXun.jpg -------------------------------------------------------------------------------- /examples/wc_cn/LuXun_black.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/wc_cn/LuXun_black.jpg -------------------------------------------------------------------------------- /examples/wc_cn/LuXun_black_colored.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/wc_cn/LuXun_black_colored.jpg -------------------------------------------------------------------------------- /examples/wc_cn/LuXun_color.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/wc_cn/LuXun_color.jpg -------------------------------------------------------------------------------- /examples/wc_cn/LuXun_colored.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/examples/wc_cn/LuXun_colored.jpg -------------------------------------------------------------------------------- /examples/wc_cn/wc_cn_license.txt: -------------------------------------------------------------------------------- 1 | LICENSE 2 | 3 | * Call to Arms (Lu Xun), collection of short stories. 4 | 5 | * LuXun_black : Thousand pointing fingers, willing ox bow (Zhao Yannian's Woodcut 1961) 6 | 7 | * LuXun_color : Thousand pointing fingers, willing ox bow (Yin Shoushi's Chinese Ink 1973)(after image processing) 8 | 9 | According to the copyright law of People's Republic of China, the use of the above three works of art 10 | in scientific research and study is allowed. Because it is out of copyright in the copyright law of the 11 | Republic of People's Republic of China, so Chinese law is no longer on the "Call to Arms" for copyright 12 | protection. But if you want to use two pieces of art in business, because the time limit for copyright 13 | is not over, you have to get the consent of the artist. 14 | 15 | * stopwords: Free to use, without authorization 16 | 17 | Shandong University of Science and Technology (Ji'nan campus) Software Alliance Laboratory(2017) Font Tian -------------------------------------------------------------------------------- /examples/wordcloud_cn.py: -------------------------------------------------------------------------------- 1 | # - * - coding: utf - 8 -*- 2 | """ 3 | create wordcloud with chinese 4 | ============================= 5 | 6 | Wordcloud is a very good tool, but if you want to create 7 | Chinese wordcloud only wordcloud is not enough. The file 8 | shows how to use wordcloud with Chinese. First, you need a 9 | Chinese word segmentation library jieba, jieba is now the 10 | most elegant the most popular Chinese word segmentation tool in python. 11 | You can use 'PIP install jieba'. To install it. As you can see, 12 | at the same time using wordcloud with jieba very convenient 13 | """ 14 | 15 | import jieba 16 | jieba.enable_parallel(4) 17 | # Setting up parallel processes :4 ,but unable to run on Windows 18 | from os import path 19 | from imageio import imread 20 | import matplotlib.pyplot as plt 21 | import os 22 | # jieba.load_userdict("txt\userdict.txt") 23 | # add userdict by load_userdict() 24 | from wordcloud import WordCloud, ImageColorGenerator 25 | 26 | # get data directory (using getcwd() is needed to support running example in generated IPython notebook) 27 | d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() 28 | 29 | stopwords_path = d + '/wc_cn/stopwords_cn_en.txt' 30 | # Chinese fonts must be set 31 | font_path = d + '/fonts/SourceHanSerif/SourceHanSerifK-Light.otf' 32 | 33 | # the path to save worldcloud 34 | imgname1 = d + '/wc_cn/LuXun.jpg' 35 | imgname2 = d + '/wc_cn/LuXun_colored.jpg' 36 | # read the mask / color image taken from 37 | back_coloring = imread(path.join(d, d + '/wc_cn/LuXun_color.jpg')) 38 | 39 | # Read the whole text. 40 | text = open(path.join(d, d + '/wc_cn/CalltoArms.txt')).read() 41 | 42 | # if you want use wordCloud,you need it 43 | # add userdict by add_word() 44 | userdict_list = ['阿Q', '孔乙己', '单四嫂子'] 45 | 46 | 47 | # The function for processing text with Jieba 48 | def jieba_processing_txt(text): 49 | for word in userdict_list: 50 | jieba.add_word(word) 51 | 52 | mywordlist = [] 53 | seg_list = jieba.cut(text, cut_all=False) 54 | liststr = "/ ".join(seg_list) 55 | 56 | with open(stopwords_path, encoding='utf-8') as f_stop: 57 | f_stop_text = f_stop.read() 58 | f_stop_seg_list = f_stop_text.splitlines() 59 | 60 | for myword in liststr.split('/'): 61 | if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1: 62 | mywordlist.append(myword) 63 | return ' '.join(mywordlist) 64 | 65 | 66 | wc = WordCloud(font_path=font_path, background_color="white", max_words=2000, mask=back_coloring, 67 | max_font_size=100, random_state=42, width=1000, height=860, margin=2,) 68 | 69 | 70 | wc.generate(jieba_processing_txt(text)) 71 | 72 | # create coloring from image 73 | image_colors_default = ImageColorGenerator(back_coloring) 74 | 75 | plt.figure() 76 | # recolor wordcloud and show 77 | plt.imshow(wc, interpolation="bilinear") 78 | plt.axis("off") 79 | plt.show() 80 | 81 | # save wordcloud 82 | wc.to_file(path.join(d, imgname1)) 83 | 84 | # create coloring from image 85 | image_colors_byImg = ImageColorGenerator(back_coloring) 86 | 87 | # show 88 | # we could also give color_func=image_colors directly in the constructor 89 | plt.imshow(wc.recolor(color_func=image_colors_byImg), interpolation="bilinear") 90 | plt.axis("off") 91 | plt.figure() 92 | plt.imshow(back_coloring, interpolation="bilinear") 93 | plt.axis("off") 94 | plt.show() 95 | 96 | # save wordcloud 97 | wc.to_file(path.join(d, imgname2)) 98 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "Cython>=0.29.33", "setuptools_scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "wordcloud" 7 | authors = [{ name = "Andreas Mueller", email = "t3kcit+wordcloud@gmail.com" }] 8 | description = "A little word cloud generator" 9 | readme = { file = "README.md", content-type = "text/markdown" } 10 | requires-python = ">=3.7" 11 | license = { text = "MIT License" } 12 | dependencies = ["numpy>=1.6.1", "pillow", "matplotlib"] 13 | dynamic = ["version"] 14 | 15 | [project.urls] 16 | Homepage = "https://github.com/amueller/word_cloud" 17 | 18 | [project.scripts] 19 | wordcloud_cli = "wordcloud.__main__:main" 20 | 21 | [tool.setuptools] 22 | packages = ["wordcloud"] 23 | 24 | [tool.setuptools_scm] 25 | write_to = "wordcloud/_version.py" 26 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | codecov 2 | coverage 3 | flake8>=3.8.0 4 | pytest 5 | pytest-cov 6 | pytest-sugar 7 | setuptools>=28.0.0 8 | twine 9 | wheel>=0.38.1 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=1.5.3 2 | numpy>=1.6.1 3 | pillow 4 | cython -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [coverage:run] 2 | branch = True 3 | source = wordcloud 4 | 5 | [coverage:xml] 6 | output = test/coverage.xml 7 | 8 | [flake8] 9 | ignore = 10 | E402 # module level import not at top of file 11 | W503 # line break before binary operator 12 | max-line-length = 120 13 | # Whether to display the pep8 instructions on failure (can be quite verbose) 14 | show-pep8 = False 15 | # Whether to show source code for each failure 16 | show-source = True 17 | # Maximum cyclomatic complexity allowed 18 | max-complexity = 14 19 | format = pylint 20 | exclude = .git,.idea,.eggs,__pycache__,dist,doc/_build,doc/auto_examples,doc/conf.py,build,wordcloud/_version.py,versioneer.py 21 | 22 | [tool:pytest] 23 | addopts = -v --cov --cov-report xml --tb=short 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from Cython.Build import cythonize 3 | 4 | setup(ext_modules=cythonize("wordcloud/query_integral_image.pyx")) 5 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture() 5 | def tmp_text_file(tmpdir_factory): 6 | fn = tmpdir_factory.mktemp("data").join("empty.txt") 7 | fn.write(b'') 8 | return fn 9 | 10 | 11 | @pytest.fixture 12 | def no_cover_compat(request): 13 | """A pytest fixture to disable coverage. 14 | 15 | .. note:: 16 | 17 | After the next version of ``pytest-cov`` is released, it will be possible to directly 18 | use the ``no_cover`` fixture or marker. 19 | """ 20 | 21 | # Check with hasplugin to avoid getplugin exception in older pytest. 22 | if request.config.pluginmanager.hasplugin('_cov'): 23 | plugin = request.config.pluginmanager.getplugin('_cov') 24 | if plugin.cov_controller: 25 | plugin.cov_controller.cov.stop() 26 | plugin.cov_controller.unset_env() 27 | yield plugin.cov_controller 28 | plugin.cov_controller.set_env() 29 | plugin.cov_controller.cov.start() 30 | -------------------------------------------------------------------------------- /test/test_wordcloud.py: -------------------------------------------------------------------------------- 1 | from wordcloud import WordCloud, get_single_color_func, ImageColorGenerator 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from random import Random 7 | from numpy.testing import assert_array_equal 8 | from PIL import Image 9 | import xml.etree.ElementTree as ET 10 | 11 | import matplotlib 12 | matplotlib.use('Agg') 13 | 14 | THIS = """The Zen of Python, by Tim Peters 15 | 16 | Beautiful is better than ugly. 17 | Explicit is better than implicit. 18 | Simple is better than complex. 19 | Complex is better than complicated. 20 | Flat is better than nested. 21 | Sparse is better than dense. 22 | Readability counts. 23 | Special cases aren't special enough to break the rules. 24 | Although practicality beats purity. 25 | Errors should never pass silently. 26 | Unless explicitly silenced. 27 | In the face of ambiguity, refuse the temptation to guess. 28 | There should be one-- and preferably only one --obvious way to do it. 29 | Although that way may not be obvious at first unless you're Dutch. 30 | Now is better than never. 31 | Although never is often better than *right* now. 32 | If the implementation is hard to explain, it's a bad idea. 33 | If the implementation is easy to explain, it may be a good idea. 34 | Namespaces are one honking great idea -- let's do more of those! 35 | 36 | 3 . 14 15 92 65 35 89 79 32 38 46 26 433 37 | 83 27 95 02 88 41 97 16 93 99 37 510 38 | 58 20 97 49 44 59 23 07 81 64 06 286 39 | 20 89 98 62 80 34 82 53 42 11 70 679 40 | 82 14 80 86 51 32 82 30 66 47 09 384 41 | 46 09 55 05 82 23 17 25 35 94 08 128 42 | """ 43 | 44 | STOPWORDED_COLLOCATIONS = """ 45 | thank you very much 46 | thank you very much 47 | thank you very much 48 | thanks 49 | """ 50 | 51 | STOPWORDED_COLLOCATIONS_UPPERCASE = """ 52 | Thank you very much 53 | Thank you very much 54 | Thank you very much 55 | thank you very much 56 | hi There 57 | Hi there 58 | Hi There 59 | thanks 60 | """ 61 | 62 | SMALL_CANVAS = """ 63 | better late than never someone will say 64 | """ 65 | 66 | 67 | def test_collocations(): 68 | wc = WordCloud(collocations=False, stopwords=set()) 69 | wc.generate(THIS) 70 | 71 | wc2 = WordCloud(collocations=True, stopwords=set()) 72 | wc2.generate(THIS) 73 | 74 | assert "is better" in wc2.words_ 75 | assert "is better" not in wc.words_ 76 | assert "way may" not in wc2.words_ 77 | 78 | 79 | def test_collocation_stopwords(): 80 | wc = WordCloud(collocations=True, stopwords={"you", "very"}, collocation_threshold=9) 81 | wc.generate(STOPWORDED_COLLOCATIONS) 82 | 83 | assert "thank you" not in wc.words_ 84 | assert "very much" not in wc.words_ 85 | assert "thank" in wc.words_ 86 | # a bigram of all stopwords will be removed 87 | assert "you very" not in wc.words_ 88 | 89 | 90 | def test_collocation_stopwords_uppercase(): 91 | wc = WordCloud(collocations=True, stopwords={"thank", "hi", "there"}, collocation_threshold=9) 92 | wc.generate(STOPWORDED_COLLOCATIONS_UPPERCASE) 93 | 94 | assert "Thank you" not in wc.words_ 95 | assert "thank you" not in wc.words_ 96 | assert "Thank" not in wc.words_ 97 | # a bigram of all stopwords will be removed 98 | assert "hi There" not in wc.words_ 99 | assert "Hi there" not in wc.words_ 100 | assert "Hi There" not in wc.words_ 101 | 102 | 103 | def test_plurals_numbers(): 104 | text = THIS + "\n" + "1 idea 2 ideas three ideas although many Ideas" 105 | wc = WordCloud(stopwords=[]).generate(text) 106 | # not capitalized usually 107 | assert "Ideas" not in wc.words_ 108 | # plural removed 109 | assert "ideas" not in wc.words_ 110 | # usually capitalized 111 | assert "although" not in wc.words_ 112 | assert "idea" in wc.words_ 113 | assert "Although" in wc.words_ 114 | assert "better than" in wc.words_ 115 | 116 | 117 | def test_multiple_s(): 118 | text = 'flo flos floss flosss' 119 | wc = WordCloud(stopwords=[]).generate(text) 120 | assert "flo" in wc.words_ 121 | assert "flos" not in wc.words_ 122 | assert "floss" in wc.words_ 123 | assert "flosss" in wc.words_ 124 | # not normalizing means that the one with just one s is kept 125 | wc = WordCloud(stopwords=[], normalize_plurals=False).generate(text) 126 | assert "flo" in wc.words_ 127 | assert "flos" in wc.words_ 128 | assert "floss" in wc.words_ 129 | assert "flosss" in wc.words_ 130 | 131 | 132 | def test_empty_text(): 133 | # test originally empty text raises an exception 134 | wc = WordCloud(stopwords=[]) 135 | with pytest.raises(ValueError): 136 | wc.generate('') 137 | 138 | # test empty-after-filtering text raises an exception 139 | wc = WordCloud(stopwords=['a', 'b']) 140 | with pytest.raises(ValueError): 141 | wc.generate('a b a') 142 | 143 | 144 | def test_default(): 145 | # test that default word cloud creation and conversions work 146 | wc = WordCloud(max_words=50) 147 | wc.generate(THIS) 148 | 149 | # check for proper word extraction 150 | assert len(wc.words_) == wc.max_words 151 | 152 | # check that we got enough words 153 | assert len(wc.layout_) == wc.max_words 154 | 155 | # check image export 156 | wc_image = wc.to_image() 157 | assert wc_image.size == (wc.width, wc.height) 158 | 159 | # check that numpy conversion works 160 | wc_array = np.array(wc) 161 | assert_array_equal(wc_array, wc.to_array()) 162 | 163 | # check size 164 | assert wc_array.shape == (wc.height, wc.width, 3) 165 | 166 | 167 | def test_stopwords_lowercasing(): 168 | # test that capitalized stopwords work. 169 | wc = WordCloud(stopwords=["Beautiful"]) 170 | processed = wc.process_text(THIS) 171 | words = [count[0] for count in processed] 172 | assert "Beautiful" not in words 173 | 174 | 175 | def test_writing_to_file(tmpdir): 176 | wc = WordCloud() 177 | wc.generate(THIS) 178 | 179 | # check writing to file 180 | filename = str(tmpdir.join("word_cloud.png")) 181 | wc.to_file(filename) 182 | loaded_image = Image.open(filename) 183 | assert loaded_image.size == (wc.width, wc.height) 184 | 185 | 186 | def test_check_errors(): 187 | wc = WordCloud() 188 | 189 | try: 190 | np.array(wc) 191 | raise AssertionError("np.array(wc) didn't raise") 192 | except ValueError as e: 193 | assert "call generate" in str(e) 194 | 195 | try: 196 | wc.recolor() 197 | raise AssertionError("wc.recolor didn't raise") 198 | except ValueError as e: 199 | assert "call generate" in str(e) 200 | 201 | 202 | def test_svg_syntax(): 203 | wc = WordCloud() 204 | wc.generate(THIS) 205 | svg = wc.to_svg() 206 | ET.fromstring(svg) 207 | 208 | 209 | def test_recolor(): 210 | wc = WordCloud(max_words=50, colormap="jet") 211 | wc.generate(THIS) 212 | array_before = wc.to_array() 213 | wc.recolor() 214 | array_after = wc.to_array() 215 | # check that the same places are filled 216 | assert_array_equal(array_before.sum(axis=-1) != 0, 217 | array_after.sum(axis=-1) != 0) 218 | # check that they are not the same 219 | assert np.abs(array_before - array_after).sum() > 10000 220 | 221 | # check that recoloring is deterministic 222 | wc.recolor(random_state=10) 223 | wc_again = wc.to_array() 224 | assert_array_equal(wc_again, wc.recolor(random_state=10)) 225 | 226 | 227 | def test_random_state(): 228 | # check that random state makes everything deterministic 229 | wc = WordCloud(random_state=0) 230 | wc2 = WordCloud(random_state=0) 231 | wc.generate(THIS) 232 | wc2.generate(THIS) 233 | assert_array_equal(wc, wc2) 234 | 235 | 236 | def test_mask(): 237 | # test masks 238 | 239 | # check that using an empty mask is equivalent to not using a mask 240 | wc = WordCloud(random_state=42) 241 | wc.generate(THIS) 242 | mask = np.zeros(np.array(wc).shape[:2], dtype=int) 243 | wc_mask = WordCloud(mask=mask, random_state=42) 244 | wc_mask.generate(THIS) 245 | assert_array_equal(wc, wc_mask) 246 | 247 | # use actual nonzero mask 248 | mask = np.zeros((234, 456), dtype=int) 249 | mask[100:150, 300:400] = 255 250 | 251 | wc = WordCloud(mask=mask) 252 | wc.generate(THIS) 253 | wc_array = np.array(wc) 254 | assert mask.shape == wc_array.shape[:2] 255 | assert_array_equal(wc_array[mask != 0], 0) 256 | assert wc_array[mask == 0].sum() > 10000 257 | 258 | 259 | def test_mask_contour(): 260 | # test mask contour is created, learn more at: 261 | # https://github.com/amueller/word_cloud/pull/348#issuecomment-370883873 262 | mask = np.zeros((234, 456), dtype=int) 263 | mask[100:150, 300:400] = 255 264 | 265 | sm = WordCloud(mask=mask, contour_width=1, contour_color='blue') 266 | sm.generate(THIS) 267 | sm_array = np.array(sm) 268 | sm_total = sm_array[100:150, 300:400].sum() 269 | 270 | lg = WordCloud(mask=mask, contour_width=20, contour_color='blue') 271 | lg.generate(THIS) 272 | lg_array = np.array(lg) 273 | lg_total = lg_array[100:150, 300:400].sum() 274 | 275 | sc = WordCloud(mask=mask, contour_width=1, scale=2, contour_color='blue') 276 | sc.generate(THIS) 277 | sc_array = np.array(sc) 278 | sc_total = sc_array[100:150, 300:400].sum() 279 | 280 | # test `contour_width` 281 | assert lg_total > sm_total 282 | 283 | # test contour varies with `scale` 284 | assert sc_total > sm_total 285 | 286 | # test `contour_color` 287 | assert all(sm_array[100, 300] == [0, 0, 255]) 288 | 289 | 290 | def test_single_color_func(): 291 | # test single color function for different color formats 292 | random = Random(42) 293 | 294 | red_function = get_single_color_func('red') 295 | assert red_function(random_state=random) == 'rgb(181, 0, 0)' 296 | 297 | hex_function = get_single_color_func('#00b4d2') 298 | assert hex_function(random_state=random) == 'rgb(0, 48, 56)' 299 | 300 | rgb_function = get_single_color_func('rgb(0,255,0)') 301 | assert rgb_function(random_state=random) == 'rgb(0, 107, 0)' 302 | 303 | rgb_perc_fun = get_single_color_func('rgb(80%,60%,40%)') 304 | assert rgb_perc_fun(random_state=random) == 'rgb(97, 72, 48)' 305 | 306 | hsl_function = get_single_color_func('hsl(0,100%,50%)') 307 | assert hsl_function(random_state=random) == 'rgb(201, 0, 0)' 308 | 309 | 310 | def test_single_color_func_grey(): 311 | # grey is special as it's a corner case 312 | random = Random(42) 313 | 314 | red_function = get_single_color_func('darkgrey') 315 | assert red_function(random_state=random) == 'rgb(181, 181, 181)' 316 | assert red_function(random_state=random) == 'rgb(56, 56, 56)' 317 | 318 | 319 | def test_process_text(): 320 | # test that process function returns a dict 321 | wc = WordCloud(max_words=50) 322 | result = wc.process_text(THIS) 323 | 324 | # check for proper return type 325 | assert isinstance(result, dict) 326 | 327 | 328 | def test_process_text_default_patterns(): 329 | wc = WordCloud(stopwords=set(), include_numbers=True, min_word_length=2) 330 | words = wc.process_text(THIS) 331 | 332 | wc2 = WordCloud(stopwords=set(), include_numbers=True, min_word_length=1) 333 | words2 = wc2.process_text(THIS) 334 | 335 | assert "a" not in words 336 | assert "3" not in words 337 | 338 | assert "a" in words2 339 | assert "3" in words2 340 | 341 | 342 | def test_process_text_regexp_parameter(): 343 | # test that word processing is influenced by `regexp` 344 | wc = WordCloud(max_words=50, regexp=r'\w{5}') 345 | words = wc.process_text(THIS) 346 | 347 | assert 'than' not in words 348 | 349 | 350 | def test_generate_from_frequencies(): 351 | # test that generate_from_frequencies() takes input argument dicts 352 | wc = WordCloud(max_words=50) 353 | words = wc.process_text(THIS) 354 | result = wc.generate_from_frequencies(words) 355 | 356 | assert isinstance(result, WordCloud) 357 | 358 | 359 | def test_relative_scaling_zero(): 360 | # non-regression test for non-integer font size 361 | wc = WordCloud(relative_scaling=0) 362 | wc.generate(THIS) 363 | 364 | 365 | def test_unicode_stopwords(): 366 | wc_unicode = WordCloud(stopwords=[u'Beautiful']) 367 | try: 368 | words_unicode = wc_unicode.process_text(unicode(THIS)) 369 | except NameError: # PY3 370 | words_unicode = wc_unicode.process_text(THIS) 371 | 372 | wc_str = WordCloud(stopwords=['Beautiful']) 373 | words_str = wc_str.process_text(str(THIS)) 374 | 375 | assert words_unicode == words_str 376 | 377 | 378 | def test_include_numbers(): 379 | wc_numbers = WordCloud(include_numbers=True) 380 | wc = wc_numbers.process_text(THIS) 381 | 382 | assert '14' in wc.keys() 383 | 384 | 385 | def test_min_word_length(): 386 | wc_numbers = WordCloud(min_word_length=5) 387 | wc = wc_numbers.process_text(THIS) 388 | word_lengths = [len(word) for word in wc.keys()] 389 | 390 | assert min(word_lengths) == 5 391 | 392 | 393 | def test_recolor_too_small(): 394 | # check exception is raised when image is too small 395 | colouring = np.array(Image.new('RGB', size=(20, 20))) 396 | wc = WordCloud(width=30, height=30, random_state=0, min_font_size=1).generate(THIS) 397 | image_colors = ImageColorGenerator(colouring) 398 | with pytest.raises(ValueError, match='ImageColorGenerator is smaller than the canvas'): 399 | wc.recolor(color_func=image_colors) 400 | 401 | 402 | def test_recolor_too_small_set_default(): 403 | # check no exception is raised when default colour is used 404 | colouring = np.array(Image.new('RGB', size=(20, 20))) 405 | wc = WordCloud(max_words=50, width=30, height=30, min_font_size=1).generate(THIS) 406 | image_colors = ImageColorGenerator(colouring, default_color=(0, 0, 0)) 407 | wc.recolor(color_func=image_colors) 408 | 409 | 410 | def test_small_canvas(): 411 | # check font size fallback works on small canvas 412 | wc = WordCloud(max_words=50, width=21, height=21) 413 | wc.generate(SMALL_CANVAS) 414 | assert len(wc.layout_) > 0 415 | 416 | 417 | def test_tiny_canvas(): 418 | # check exception if canvas too small for fallback 419 | w = WordCloud(max_words=50, width=1, height=1) 420 | with pytest.raises(ValueError, match="Couldn't find space to draw"): 421 | w.generate(THIS) 422 | assert len(w.layout_) == 0 423 | 424 | 425 | def test_coloring_black_works(): 426 | # check that using black colors works. 427 | mask = np.zeros((50, 50, 3)) 428 | image_colors = ImageColorGenerator(mask) 429 | wc = WordCloud(width=50, height=50, random_state=42, 430 | color_func=image_colors, min_font_size=1) 431 | wc.generate(THIS) 432 | 433 | 434 | def test_repeat(): 435 | short_text = "Some short text" 436 | wc = WordCloud(stopwords=[]).generate(short_text) 437 | assert len(wc.layout_) == 3 438 | wc = WordCloud(max_words=50, stopwords=[], repeat=True).generate(short_text) 439 | # multiple of word count larger than max_words 440 | assert len(wc.layout_) == 51 441 | # relative scaling doesn't work well with repeat 442 | assert wc.relative_scaling == 0 443 | # all frequencies are 1 444 | assert len(wc.words_) == 3 445 | assert_array_equal(list(wc.words_.values()), 1) 446 | frequencies = [w[0][1] for w in wc.layout_] 447 | assert_array_equal(frequencies, 1) 448 | repetition_text = "Some short text with text" 449 | wc = WordCloud(max_words=52, stopwords=[], repeat=True) 450 | wc.generate(repetition_text) 451 | assert len(wc.words_) == 4 452 | # normalized frequencies 453 | assert wc.words_['text'] == 1 454 | assert wc.words_['with'] == .5 455 | assert len(wc.layout_), wc.max_words 456 | frequencies = [w[0][1] for w in wc.layout_] 457 | # check that frequencies are sorted 458 | assert np.all(np.diff(frequencies) <= 0) 459 | 460 | 461 | def test_zero_frequencies(): 462 | 463 | word_cloud = WordCloud() 464 | 465 | word_cloud.generate_from_frequencies({'test': 1, 'test1': 0, 'test2': 0}) 466 | assert len(word_cloud.layout_) == 1 467 | assert word_cloud.layout_[0][0][0] == 'test' 468 | 469 | 470 | def test_plural_stopwords(): 471 | x = '''was was was was was was was was was was was was was was was 472 | wa 473 | hello hello hello hello hello hello hello hello 474 | goodbye good bye maybe yes no''' 475 | w = WordCloud().generate(x) 476 | assert w.words_['wa'] < 1 477 | 478 | w = WordCloud(collocations=False).generate(x) 479 | assert w.words_['wa'] < 1 480 | 481 | 482 | def test_max_font_size_as_mask_height(): 483 | # test if max font size will respect the mask height 484 | x = '''hello hello hello 485 | bye''' 486 | 487 | # Get default wordcloud size 488 | wcd = WordCloud() 489 | default_size = (wcd.height, wcd.width) 490 | # Make sure the size we are using is larger than the default size 491 | size = (default_size[0] * 2, default_size[1] * 2) 492 | 493 | # using mask, all drawable 494 | mask = np.zeros(size, dtype=int) 495 | mask[:, :] = 0 496 | wc = WordCloud(mask=mask, random_state=42) 497 | wc.generate(x) 498 | 499 | # no mask 500 | wc2 = WordCloud(width=size[1], height=size[0], random_state=42) 501 | wc2.generate(x) 502 | 503 | # Check if the biggest element has the same font size 504 | assert wc.layout_[0][1] == wc2.layout_[0][1] 505 | -------------------------------------------------------------------------------- /test/test_wordcloud_cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | import sys 5 | from collections import namedtuple 6 | import contextlib 7 | 8 | import wordcloud as wc 9 | from wordcloud import wordcloud_cli as cli 10 | 11 | from unittest.mock import patch 12 | import pytest 13 | 14 | import matplotlib 15 | matplotlib.use('Agg') 16 | 17 | 18 | class PassFile(object): 19 | pass 20 | 21 | 22 | ArgOption = namedtuple('ArgOption', ['cli_name', 'init_name', 'pass_value', 'fail_value']) 23 | ARGUMENT_SPEC_TYPED = [ 24 | ArgOption(cli_name='width', init_name='width', pass_value=13, fail_value=1.), 25 | ArgOption(cli_name='height', init_name='height', pass_value=15, fail_value=1.), 26 | ArgOption(cli_name='margin', init_name='margin', pass_value=17, fail_value=1.), 27 | ArgOption(cli_name='relative_scaling', init_name='relative_scaling', pass_value=1, fail_value='c'), 28 | ] 29 | ARGUMENT_SPEC_UNARY = [ 30 | ArgOption(cli_name='no_collocations', init_name='collocations', pass_value=True, fail_value=1), 31 | ArgOption(cli_name='include_numbers', init_name='include_numbers', pass_value=True, fail_value=2), 32 | ArgOption(cli_name='no_normalize_plurals', init_name='normalize_plurals', pass_value=True, fail_value=3), 33 | ArgOption(cli_name='repeat', init_name='repeat', pass_value=True, fail_value=4), 34 | ] 35 | ARGUMENT_SPEC_REMAINING = [ 36 | ArgOption(cli_name='stopwords', init_name='stopwords', pass_value=PassFile(), fail_value=None), 37 | ArgOption(cli_name='regexp', init_name='regexp', pass_value=r'\w{2,}', fail_value=r'12('), 38 | ArgOption(cli_name='mask', init_name='mask', pass_value=PassFile(), fail_value=None), 39 | ArgOption(cli_name='fontfile', init_name='font_path', pass_value=PassFile(), fail_value=None), 40 | ArgOption(cli_name='color', init_name='color_func', pass_value='red', fail_value=None), 41 | ArgOption(cli_name='background', init_name='background_color', pass_value='grey', fail_value=None), 42 | ArgOption(cli_name='contour_color', init_name='contour_color', pass_value='grey', fail_value=None), 43 | ArgOption(cli_name='contour_width', init_name='contour_width', pass_value=0.5, fail_value='blue'), 44 | ArgOption(cli_name='min_word_length', init_name='min_word_length', pass_value=5, fail_value='blue'), 45 | ArgOption(cli_name='prefer_horizontal', init_name='prefer_horizontal', pass_value=.1, fail_value='blue'), 46 | ArgOption(cli_name='scale', init_name='scale', pass_value=1., fail_value='blue'), 47 | ArgOption(cli_name='colormap', init_name='colormap', pass_value='Greens', fail_value=1), 48 | ArgOption(cli_name='mode', init_name='mode', pass_value='RGBA', fail_value=2), 49 | ArgOption(cli_name='max_words', init_name='max_words', pass_value=10, fail_value='blue'), 50 | ArgOption(cli_name='min_font_size', init_name='min_font_size', pass_value=10, fail_value='blue'), 51 | ArgOption(cli_name='max_font_size', init_name='max_font_size', pass_value=10, fail_value='blue'), 52 | ArgOption(cli_name='font_step', init_name='font_step', pass_value=10, fail_value='blue'), 53 | ArgOption(cli_name='random_state', init_name='random_state', pass_value=100, fail_value='blue'), 54 | ] 55 | ARGUMENT_CLI_NAMES_UNARY = [arg_opt.cli_name for arg_opt in ARGUMENT_SPEC_UNARY] 56 | 57 | 58 | def all_arguments(): 59 | arguments = [] 60 | arguments.extend(ARGUMENT_SPEC_TYPED) 61 | arguments.extend(ARGUMENT_SPEC_UNARY) 62 | arguments.extend(ARGUMENT_SPEC_REMAINING) 63 | return arguments 64 | 65 | 66 | def test_main_passes_arguments_through(tmpdir): 67 | 68 | image_filepath = str(tmpdir.join('word_cloud.png')) 69 | 70 | args = argparse.Namespace() 71 | for option in all_arguments(): 72 | setattr(args, option.init_name, option.pass_value) 73 | 74 | text = 'some long text' 75 | image_file = open(image_filepath, 'w') 76 | with patch('wordcloud.wordcloud_cli.wc.WordCloud', autospec=True) as mock_word_cloud: 77 | cli.main(vars(args), text, image_file) 78 | 79 | posargs, kwargs = mock_word_cloud.call_args 80 | for option in all_arguments(): 81 | assert option.init_name in kwargs 82 | 83 | 84 | def check_argument(text_filepath, name, result_name, value): 85 | args, text, image_file = cli.parse_args(['--text', text_filepath, '--' + name, str(value)]) 86 | assert result_name in args 87 | 88 | 89 | def check_argument_unary(text_filepath, name, result_name): 90 | args, text, image_file = cli.parse_args(['--text', text_filepath, '--' + name]) 91 | assert result_name in args 92 | 93 | 94 | def check_argument_type(text_filepath, name, value): 95 | with pytest.raises((SystemExit, ValueError),): 96 | args, text, image_file = cli.parse_args(['--text', text_filepath, '--' + name, str(value)]) 97 | 98 | 99 | @pytest.mark.parametrize("option", all_arguments()) 100 | def test_parse_args_are_passed_along(option, tmpdir, tmp_text_file): 101 | if option.cli_name in ARGUMENT_CLI_NAMES_UNARY: 102 | check_argument_unary(str(tmp_text_file), option.cli_name, option.init_name) 103 | elif option.cli_name != 'mask': 104 | pass_value = option.pass_value 105 | if isinstance(option.pass_value, PassFile): 106 | input_file = tmpdir.join("%s_file" % option.cli_name) 107 | input_file.write(b"") 108 | pass_value = str(input_file) 109 | check_argument(str(tmp_text_file), option.cli_name, option.init_name, pass_value) 110 | 111 | 112 | @pytest.mark.parametrize("option", ARGUMENT_SPEC_TYPED) 113 | def test_parse_arg_types(option, tmp_text_file): 114 | check_argument_type(str(tmp_text_file), option.cli_name, option.fail_value) 115 | 116 | 117 | def test_check_duplicate_color_error(tmpdir, tmp_text_file): 118 | color_mask_file = tmpdir.join("input_color_mask.png") 119 | color_mask_file.write(b"") 120 | 121 | with pytest.raises(ValueError, match=r'.*specify either.*'): 122 | cli.parse_args(['--color', 'red', '--colormask', str(color_mask_file), '--text', str(tmp_text_file)]) 123 | 124 | 125 | def test_parse_args_defaults_to_random_color(tmp_text_file): 126 | args, text, image_file = cli.parse_args(['--text', str(tmp_text_file)]) 127 | assert args['color_func'] == wc.random_color_func 128 | 129 | 130 | def test_unicode_text_file(): 131 | unicode_file = os.path.join(os.path.dirname(__file__), "unicode_text.txt") 132 | args, text, image_file = cli.parse_args(['--text', unicode_file]) 133 | assert len(text) == 16 134 | 135 | 136 | def test_unicode_with_stopwords(): 137 | unicode_file = os.path.join(os.path.dirname(__file__), "unicode_text.txt") 138 | stopwords_file = os.path.join(os.path.dirname(__file__), "unicode_stopwords.txt") 139 | args, text, image_file = cli.parse_args(['--text', unicode_file, '--stopwords', stopwords_file]) 140 | 141 | # expect the unicode character from stopwords file was correctly read in 142 | assert u'\u304D' in args['stopwords'] 143 | 144 | 145 | def test_cli_writes_to_imagefile(tmpdir, tmp_text_file): 146 | # ensure writing works with all python versions 147 | tmp_image_file = tmpdir.join("word_cloud.png") 148 | 149 | tmp_text_file.write(b'some text') 150 | 151 | args, text, image_file = cli.parse_args(['--text', str(tmp_text_file), '--imagefile', str(tmp_image_file)]) 152 | cli.main(args, text, image_file) 153 | 154 | # expecting image to be written to imagefile 155 | assert tmp_image_file.size() > 0 156 | 157 | 158 | # capsysbinary should be used here, but it's not supported in python 2. 159 | def test_cli_writes_to_stdout(tmpdir, tmp_text_file): 160 | # ensure writing works with all python versions 161 | tmp_image_file = tmpdir.join("word_cloud.png") 162 | 163 | tmp_text_file.write(b'some text') 164 | 165 | with contextlib.redirect_stdout(tmp_image_file.open('w+')): 166 | args, text, image_file = cli.parse_args(['--text', str(tmp_text_file)]) 167 | cli.main(args, text, image_file) 168 | 169 | # expecting image to be written to stdout 170 | assert tmp_image_file.size() > 0 171 | 172 | 173 | def test_cli_regexp(tmp_text_file): 174 | cli.parse_args(['--regexp', r"\w[\w']+", '--text', str(tmp_text_file)]) 175 | 176 | 177 | def test_cli_regexp_invalid(tmp_text_file, capsys): 178 | with pytest.raises(SystemExit): 179 | cli.parse_args(['--regexp', r"invalid[", '--text', str(tmp_text_file)]) 180 | 181 | _, err = capsys.readouterr() 182 | assert "Invalid regular expression" in err 183 | 184 | 185 | @pytest.mark.parametrize("command,expected_output, expected_exit_code", [ 186 | ("wordcloud_cli --help", "usage: wordcloud_cli", 0), 187 | ("%s -m wordcloud --help" % sys.executable, "usage: __main__", 0), 188 | ("%s %s/../wordcloud/wordcloud_cli.py --help" % (sys.executable, os.path.dirname(__file__)), "To execute the CLI", 1), 189 | ]) 190 | def test_cli_as_executable(command, expected_output, expected_exit_code, tmpdir, capfd, no_cover_compat): 191 | 192 | ret_code = 0 193 | try: 194 | subprocess.check_call( 195 | command, 196 | shell=True, 197 | cwd=str(tmpdir) 198 | ) 199 | except subprocess.CalledProcessError as excinfo: 200 | ret_code = excinfo.returncode 201 | 202 | out, err = capfd.readouterr() 203 | assert expected_output in out if ret_code == 0 else err 204 | 205 | assert ret_code == expected_exit_code 206 | -------------------------------------------------------------------------------- /test/unicode_stopwords.txt: -------------------------------------------------------------------------------- 1 | き 2 | -------------------------------------------------------------------------------- /test/unicode_text.txt: -------------------------------------------------------------------------------- 1 | きたないのよりきれいな方がいい 2 | -------------------------------------------------------------------------------- /wordcloud/DroidSansMono.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/word_cloud/e7753d4b9904a5bc6c96a4705ef43d7169572fda/wordcloud/DroidSansMono.ttf -------------------------------------------------------------------------------- /wordcloud/TODO: -------------------------------------------------------------------------------- 1 | * html export 2 | * good notebook interface 3 | * by default differnt color schemes 4 | * long functions? 5 | * redo examples 6 | * examples 7 | -------------------------------------------------------------------------------- /wordcloud/__init__.py: -------------------------------------------------------------------------------- 1 | from .wordcloud import (WordCloud, STOPWORDS, random_color_func, 2 | get_single_color_func) 3 | from .color_from_image import ImageColorGenerator 4 | 5 | __all__ = ['WordCloud', 'STOPWORDS', 'random_color_func', 6 | 'get_single_color_func', 'ImageColorGenerator', 7 | '__version__'] 8 | 9 | from ._version import __version__ 10 | -------------------------------------------------------------------------------- /wordcloud/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Command line tool to generate word clouds 3 | 4 | The name ``__main__.py`` is important as it enables execution 5 | of the module using ``python -m wordcloud`` syntax. 6 | 7 | Usage: 8 | 9 | * using ``wordcloud_cli`` executable:: 10 | 11 | $ cat word.txt | wordcloud_cli 12 | 13 | $ wordcloud_cli --text=words.txt --stopwords=stopwords.txt 14 | 15 | * using ``wordcloud`` module:: 16 | 17 | $ cat word.txt | python -m wordcloud 18 | 19 | $ python -m wordcloud --text=words.txt --stopwords=stopwords.txt 20 | """ 21 | 22 | import sys 23 | 24 | from .wordcloud_cli import main as wordcloud_cli_main 25 | from .wordcloud_cli import parse_args as wordcloud_cli_parse_args 26 | 27 | 28 | def main(): 29 | """The main entry point to wordcloud_cli``. 30 | 31 | This is installed as the script entry point. 32 | """ 33 | wordcloud_cli_main(*wordcloud_cli_parse_args(sys.argv[1:])) 34 | 35 | 36 | if __name__ == '__main__': # pragma: no cover 37 | main() 38 | -------------------------------------------------------------------------------- /wordcloud/color_from_image.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import ImageFont 3 | 4 | 5 | class ImageColorGenerator(object): 6 | """Color generator based on a color image. 7 | 8 | Generates colors based on an RGB image. A word will be colored using 9 | the mean color of the enclosing rectangle in the color image. 10 | 11 | After construction, the object acts as a callable that can be passed as 12 | color_func to the word cloud constructor or to the recolor method. 13 | 14 | Parameters 15 | ---------- 16 | image : nd-array, shape (height, width, 3) 17 | Image to use to generate word colors. Alpha channels are ignored. 18 | This should be the same size as the canvas. for the wordcloud. 19 | default_color : tuple or None, default=None 20 | Fallback colour to use if the canvas is larger than the image, 21 | in the format (r, g, b). If None, raise ValueError instead. 22 | """ 23 | # returns the average color of the image in that region 24 | def __init__(self, image, default_color=None): 25 | if image.ndim not in [2, 3]: 26 | raise ValueError("ImageColorGenerator needs an image with ndim 2 or" 27 | " 3, got %d" % image.ndim) 28 | if image.ndim == 3 and image.shape[2] not in [3, 4]: 29 | raise ValueError("A color image needs to have 3 or 4 channels, got %d" 30 | % image.shape[2]) 31 | self.image = image 32 | self.default_color = default_color 33 | 34 | def __call__(self, word, font_size, font_path, position, orientation, **kwargs): 35 | """Generate a color for a given word using a fixed image.""" 36 | # get the font to get the box size 37 | font = ImageFont.truetype(font_path, font_size) 38 | transposed_font = ImageFont.TransposedFont(font, 39 | orientation=orientation) 40 | # get size of resulting text 41 | box_size = transposed_font.getbbox(word) 42 | x = position[0] 43 | y = position[1] 44 | # cut out patch under word box 45 | patch = self.image[x:x + box_size[2], y:y + box_size[3]] 46 | if patch.ndim == 3: 47 | # drop alpha channel if any 48 | patch = patch[:, :, :3] 49 | if patch.ndim == 2: 50 | raise NotImplementedError("Gray-scale images TODO") 51 | # check if the text is within the bounds of the image 52 | reshape = patch.reshape(-1, 3) 53 | if not np.all(reshape.shape): 54 | if self.default_color is None: 55 | raise ValueError('ImageColorGenerator is smaller than the canvas') 56 | return "rgb(%d, %d, %d)" % tuple(self.default_color) 57 | color = np.mean(reshape, axis=0) 58 | return "rgb(%d, %d, %d)" % tuple(color) 59 | -------------------------------------------------------------------------------- /wordcloud/query_integral_image.pyx: -------------------------------------------------------------------------------- 1 | # cython: language_level=3 2 | # cython: boundscheck=False 3 | # cython: wraparound=False 4 | import array 5 | import numpy as np 6 | 7 | 8 | def query_integral_image(unsigned int[:,:] integral_image, int size_x, int 9 | size_y, random_state): 10 | cdef int x = integral_image.shape[0] 11 | cdef int y = integral_image.shape[1] 12 | cdef int area, i, j 13 | cdef int hits = 0 14 | 15 | # count how many possible locations 16 | for i in xrange(x - size_x): 17 | for j in xrange(y - size_y): 18 | area = integral_image[i, j] + integral_image[i + size_x, j + size_y] 19 | area -= integral_image[i + size_x, j] + integral_image[i, j + size_y] 20 | if not area: 21 | hits += 1 22 | if not hits: 23 | # no room left 24 | return None 25 | # pick a location at random 26 | cdef int goal = random_state.randint(0, hits) 27 | hits = 0 28 | for i in xrange(x - size_x): 29 | for j in xrange(y - size_y): 30 | area = integral_image[i, j] + integral_image[i + size_x, j + size_y] 31 | area -= integral_image[i + size_x, j] + integral_image[i, j + size_y] 32 | if not area: 33 | hits += 1 34 | if hits == goal: 35 | return i, j 36 | -------------------------------------------------------------------------------- /wordcloud/stopwords: -------------------------------------------------------------------------------- 1 | a 2 | about 3 | above 4 | after 5 | again 6 | against 7 | all 8 | also 9 | am 10 | an 11 | and 12 | any 13 | are 14 | aren't 15 | as 16 | at 17 | be 18 | because 19 | been 20 | before 21 | being 22 | below 23 | between 24 | both 25 | but 26 | by 27 | can 28 | can't 29 | cannot 30 | com 31 | could 32 | couldn't 33 | did 34 | didn't 35 | do 36 | does 37 | doesn't 38 | doing 39 | don't 40 | down 41 | during 42 | each 43 | else 44 | ever 45 | few 46 | for 47 | from 48 | further 49 | get 50 | had 51 | hadn't 52 | has 53 | hasn't 54 | have 55 | haven't 56 | having 57 | he 58 | he'd 59 | he'll 60 | he's 61 | hence 62 | her 63 | here 64 | here's 65 | hers 66 | herself 67 | him 68 | himself 69 | his 70 | how 71 | how's 72 | however 73 | http 74 | i 75 | i'd 76 | i'll 77 | i'm 78 | i've 79 | if 80 | in 81 | into 82 | is 83 | isn't 84 | it 85 | it's 86 | its 87 | itself 88 | just 89 | k 90 | let's 91 | like 92 | me 93 | more 94 | most 95 | mustn't 96 | my 97 | myself 98 | no 99 | nor 100 | not 101 | of 102 | off 103 | on 104 | once 105 | only 106 | or 107 | other 108 | otherwise 109 | ought 110 | our 111 | ours 112 | ourselves 113 | out 114 | over 115 | own 116 | r 117 | same 118 | shall 119 | shan't 120 | she 121 | she'd 122 | she'll 123 | she's 124 | should 125 | shouldn't 126 | since 127 | so 128 | some 129 | such 130 | than 131 | that 132 | that's 133 | the 134 | their 135 | theirs 136 | them 137 | themselves 138 | then 139 | there 140 | there's 141 | therefore 142 | these 143 | they 144 | they'd 145 | they'll 146 | they're 147 | they've 148 | this 149 | those 150 | through 151 | to 152 | too 153 | under 154 | until 155 | up 156 | very 157 | was 158 | wasn't 159 | we 160 | we'd 161 | we'll 162 | we're 163 | we've 164 | were 165 | weren't 166 | what 167 | what's 168 | when 169 | when's 170 | where 171 | where's 172 | which 173 | while 174 | who 175 | who's 176 | whom 177 | why 178 | why's 179 | with 180 | won't 181 | would 182 | wouldn't 183 | www 184 | you 185 | you'd 186 | you'll 187 | you're 188 | you've 189 | your 190 | yours 191 | yourself 192 | yourselves 193 | -------------------------------------------------------------------------------- /wordcloud/tokenization.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from itertools import tee 3 | from operator import itemgetter 4 | from collections import defaultdict 5 | from math import log 6 | 7 | 8 | def l(k, n, x): # noqa: E741, E743 9 | # dunning's likelihood ratio with notation from 10 | # http://nlp.stanford.edu/fsnlp/promo/colloc.pdf p162 11 | return log(max(x, 1e-10)) * k + log(max(1 - x, 1e-10)) * (n - k) 12 | 13 | 14 | def score(count_bigram, count1, count2, n_words): 15 | """Collocation score""" 16 | if n_words <= count1 or n_words <= count2: 17 | # only one words appears in the whole document 18 | return 0 19 | N = n_words 20 | c12 = count_bigram 21 | c1 = count1 22 | c2 = count2 23 | p = c2 / N 24 | p1 = c12 / c1 25 | p2 = (c2 - c12) / (N - c1) 26 | score = (l(c12, c1, p) + l(c2 - c12, N - c1, p) 27 | - l(c12, c1, p1) - l(c2 - c12, N - c1, p2)) 28 | return -2 * score 29 | 30 | 31 | def pairwise(iterable): 32 | # from itertool recipies 33 | # is -> (s0,s1), (s1,s2), (s2, s3), ... 34 | a, b = tee(iterable) 35 | next(b, None) 36 | return zip(a, b) 37 | 38 | 39 | def unigrams_and_bigrams(words, stopwords, normalize_plurals=True, collocation_threshold=30): 40 | # We must create the bigrams before removing the stopword tokens from the words, or else we get bigrams like 41 | # "thank much" from "thank you very much". 42 | # We don't allow any of the words in the bigram to be stopwords 43 | bigrams = list(p for p in pairwise(words) if not any(w.lower() in stopwords for w in p)) 44 | unigrams = list(w for w in words if w.lower() not in stopwords) 45 | n_words = len(unigrams) 46 | counts_unigrams, standard_form = process_tokens( 47 | unigrams, normalize_plurals=normalize_plurals) 48 | counts_bigrams, standard_form_bigrams = process_tokens( 49 | [" ".join(bigram) for bigram in bigrams], 50 | normalize_plurals=normalize_plurals) 51 | # create a copy of counts_unigram so the score computation is not changed 52 | orig_counts = counts_unigrams.copy() 53 | 54 | # Include bigrams that are also collocations 55 | for bigram_string, count in counts_bigrams.items(): 56 | bigram = tuple(bigram_string.split(" ")) 57 | word1 = standard_form[bigram[0].lower()] 58 | word2 = standard_form[bigram[1].lower()] 59 | 60 | collocation_score = score(count, orig_counts[word1], orig_counts[word2], n_words) 61 | if collocation_score > collocation_threshold: 62 | # bigram is a collocation 63 | # discount words in unigrams dict. hack because one word might 64 | # appear in multiple collocations at the same time 65 | # (leading to negative counts) 66 | counts_unigrams[word1] -= counts_bigrams[bigram_string] 67 | counts_unigrams[word2] -= counts_bigrams[bigram_string] 68 | counts_unigrams[bigram_string] = counts_bigrams[bigram_string] 69 | for word, count in list(counts_unigrams.items()): 70 | if count <= 0: 71 | del counts_unigrams[word] 72 | return counts_unigrams 73 | 74 | 75 | def process_tokens(words, normalize_plurals=True): 76 | """Normalize cases and remove plurals. 77 | 78 | Each word is represented by the most common case. 79 | If a word appears with an "s" on the end and without an "s" on the end, 80 | the version with "s" is assumed to be a plural and merged with the 81 | version without "s" (except if the word ends with "ss"). 82 | 83 | Parameters 84 | ---------- 85 | words : iterable of strings 86 | Words to count. 87 | 88 | normalize_plurals : bool, default=True 89 | Whether to try and detect plurals and remove trailing "s". 90 | 91 | Returns 92 | ------- 93 | counts : dict from string to int 94 | Counts for each unique word, with cases represented by the most common 95 | case, and plurals removed. 96 | 97 | standard_forms : dict from string to string 98 | For each lower-case word the standard capitalization. 99 | """ 100 | # words can be either a list of unigrams or bigrams 101 | # d is a dict of dicts. 102 | # Keys of d are word.lower(). Values are dicts 103 | # counting frequency of each capitalization 104 | d = defaultdict(dict) 105 | for word in words: 106 | word_lower = word.lower() 107 | # get dict of cases for word_lower 108 | case_dict = d[word_lower] 109 | # increase this case 110 | case_dict[word] = case_dict.get(word, 0) + 1 111 | if normalize_plurals: 112 | # merge plurals into the singular count (simple cases only) 113 | merged_plurals = {} 114 | for key in list(d.keys()): 115 | if key.endswith('s') and not key.endswith("ss"): 116 | key_singular = key[:-1] 117 | if key_singular in d: 118 | dict_plural = d[key] 119 | dict_singular = d[key_singular] 120 | for word, count in dict_plural.items(): 121 | singular = word[:-1] 122 | dict_singular[singular] = ( 123 | dict_singular.get(singular, 0) + count) 124 | merged_plurals[key] = key_singular 125 | del d[key] 126 | fused_cases = {} 127 | standard_cases = {} 128 | item1 = itemgetter(1) 129 | for word_lower, case_dict in d.items(): 130 | # Get the most popular case. 131 | first = max(case_dict.items(), key=item1)[0] 132 | fused_cases[first] = sum(case_dict.values()) 133 | standard_cases[word_lower] = first 134 | if normalize_plurals: 135 | # add plurals to fused cases: 136 | for plural, singular in merged_plurals.items(): 137 | standard_cases[plural] = standard_cases[singular.lower()] 138 | return fused_cases, standard_cases 139 | -------------------------------------------------------------------------------- /wordcloud/wordcloud_cli.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Command-line tool interface to generate word clouds. 3 | """ 4 | from __future__ import absolute_import 5 | 6 | import sys 7 | import textwrap 8 | 9 | if __name__ == '__main__': # pragma: no cover 10 | sys.exit(textwrap.dedent( 11 | """ 12 | To execute the CLI, instead consider running: 13 | 14 | wordcloud_cli --help 15 | 16 | or 17 | 18 | python -m wordcloud --help 19 | """)) 20 | 21 | import io 22 | import re 23 | import argparse 24 | import wordcloud as wc 25 | import numpy as np 26 | from PIL import Image 27 | 28 | from . import __version__ 29 | 30 | 31 | class FileType(object): 32 | """Factory for creating file object types. 33 | 34 | Port from argparse so we can support unicode file reading in Python2 35 | 36 | Instances of FileType are typically passed as type= arguments to the 37 | ArgumentParser add_argument() method. 38 | 39 | Keyword Arguments: 40 | - mode -- A string indicating how the file is to be opened. Accepts the 41 | same values as the builtin open() function. 42 | - bufsize -- The file's desired buffer size. Accepts the same values as 43 | the builtin open() function. 44 | 45 | """ 46 | 47 | def __init__(self, mode='r', bufsize=-1): 48 | self._mode = mode 49 | self._bufsize = bufsize 50 | 51 | def __call__(self, string): 52 | # the special argument "-" means sys.std{in,out} 53 | if string == '-': 54 | if 'r' in self._mode: 55 | return sys.stdin 56 | elif 'w' in self._mode: 57 | return sys.stdout.buffer if 'b' in self._mode else sys.stdout 58 | else: 59 | msg = 'argument "-" with mode %r' % self._mode 60 | raise ValueError(msg) 61 | 62 | # all other arguments are used as file names 63 | try: 64 | encoding = None if 'b' in self._mode else "UTF-8" 65 | return io.open(string, self._mode, self._bufsize, encoding=encoding) 66 | except IOError as e: 67 | message = "can't open '%s': %s" 68 | raise argparse.ArgumentTypeError(message % (string, e)) 69 | 70 | def __repr__(self): 71 | args = self._mode, self._bufsize 72 | args_str = ', '.join(repr(arg) for arg in args if arg != -1) 73 | return '%s(%s)' % (type(self).__name__, args_str) 74 | 75 | 76 | class RegExpAction(argparse.Action): 77 | def __init__(self, option_strings, dest, **kwargs): 78 | super(RegExpAction, self).__init__(option_strings, dest, **kwargs) 79 | 80 | def __call__(self, parser, namespace, values, option_string=None): 81 | try: 82 | re.compile(values) 83 | except re.error as e: 84 | raise argparse.ArgumentError(self, 'Invalid regular expression: ' + str(e)) 85 | setattr(namespace, self.dest, values) 86 | 87 | 88 | def main(args, text, imagefile): 89 | wordcloud = wc.WordCloud(**args) 90 | wordcloud.generate(text) 91 | image = wordcloud.to_image() 92 | 93 | with imagefile: 94 | image.save(imagefile, format='png', optimize=True) 95 | 96 | 97 | def make_parser(): 98 | description = 'A simple command line interface for wordcloud module.' 99 | parser = argparse.ArgumentParser(description=description) 100 | parser.add_argument( 101 | '--text', metavar='file', type=FileType(), default='-', 102 | help='specify file of words to build the word cloud (default: stdin)') 103 | parser.add_argument( 104 | '--regexp', metavar='regexp', default=None, action=RegExpAction, 105 | help='override the regular expression defining what constitutes a word') 106 | parser.add_argument( 107 | '--stopwords', metavar='file', type=FileType(), 108 | help='specify file of stopwords (containing one word per line)' 109 | ' to remove from the given text after parsing') 110 | parser.add_argument( 111 | '--imagefile', metavar='file', type=FileType('wb'), 112 | default='-', 113 | help='file the completed PNG image should be written to' 114 | ' (default: stdout)') 115 | parser.add_argument( 116 | '--fontfile', metavar='path', dest='font_path', 117 | help='path to font file you wish to use (default: DroidSansMono)') 118 | parser.add_argument( 119 | '--mask', metavar='file', type=argparse.FileType('rb'), 120 | help='mask to use for the image form') 121 | parser.add_argument( 122 | '--colormask', metavar='file', type=argparse.FileType('rb'), 123 | help='color mask to use for image coloring') 124 | parser.add_argument( 125 | '--contour_width', metavar='width', default=0, type=float, 126 | dest='contour_width', 127 | help='if greater than 0, draw mask contour (default: 0)') 128 | parser.add_argument( 129 | '--contour_color', metavar='color', default='black', type=str, 130 | dest='contour_color', 131 | help='use given color as mask contour color -' 132 | ' accepts any value from PIL.ImageColor.getcolor') 133 | parser.add_argument( 134 | '--relative_scaling', type=float, default=0, 135 | metavar='rs', help=' scaling of words by frequency (0 - 1)') 136 | parser.add_argument( 137 | '--margin', type=int, default=2, 138 | metavar='width', help='spacing to leave around words') 139 | parser.add_argument( 140 | '--width', type=int, default=400, 141 | metavar='width', help='define output image width') 142 | parser.add_argument( 143 | '--height', type=int, default=200, 144 | metavar='height', help='define output image height') 145 | parser.add_argument( 146 | '--color', metavar='color', 147 | help='use given color as coloring for the image -' 148 | ' accepts any value from PIL.ImageColor.getcolor') 149 | parser.add_argument( 150 | '--background', metavar='color', default='black', type=str, 151 | dest='background_color', 152 | help='use given color as background color for the image -' 153 | ' accepts any value from PIL.ImageColor.getcolor') 154 | parser.add_argument( 155 | '--no_collocations', action='store_false', dest='collocations', 156 | help='do not add collocations (bigrams) to word cloud ' 157 | '(default: add unigrams and bigrams)') 158 | parser.add_argument( 159 | '--include_numbers', 160 | action='store_true', 161 | dest='include_numbers', 162 | help='include numbers in wordcloud?') 163 | parser.add_argument( 164 | '--min_word_length', 165 | type=int, 166 | default=0, 167 | metavar='min_word_length', 168 | dest='min_word_length', 169 | help='only include words with more than X letters') 170 | parser.add_argument( 171 | '--prefer_horizontal', 172 | type=float, default=.9, metavar='ratio', 173 | help='ratio of times to try horizontal fitting as opposed to vertical') 174 | parser.add_argument( 175 | '--scale', 176 | type=float, default=1, metavar='scale', 177 | help='scaling between computation and drawing') 178 | parser.add_argument( 179 | '--colormap', 180 | type=str, default='viridis', metavar='map', 181 | help='matplotlib colormap name') 182 | parser.add_argument( 183 | '--mode', 184 | type=str, default='RGB', metavar='mode', 185 | help='use RGB or RGBA for transparent background') 186 | parser.add_argument( 187 | '--max_words', 188 | type=int, default=200, metavar='N', 189 | help='maximum number of words') 190 | parser.add_argument( 191 | '--min_font_size', 192 | type=int, default=4, metavar='size', 193 | help='smallest font size to use') 194 | parser.add_argument( 195 | '--max_font_size', 196 | type=int, default=None, metavar='size', 197 | help='maximum font size for the largest word') 198 | parser.add_argument( 199 | '--font_step', 200 | type=int, default=1, metavar='step', 201 | help='step size for the font') 202 | parser.add_argument( 203 | '--random_state', 204 | type=int, default=None, metavar='seed', 205 | help='random seed') 206 | parser.add_argument( 207 | '--no_normalize_plurals', 208 | action='store_false', 209 | dest='normalize_plurals', 210 | help='whether to remove trailing \'s\' from words') 211 | parser.add_argument( 212 | '--repeat', 213 | action='store_true', 214 | dest='repeat', 215 | help='whether to repeat words and phrases') 216 | parser.add_argument( 217 | '--version', action='version', 218 | version='%(prog)s {version}'.format(version=__version__)) 219 | return parser 220 | 221 | 222 | def parse_args(arguments): 223 | # prog = 'python wordcloud_cli.py' 224 | parser = make_parser() 225 | args = parser.parse_args(arguments) 226 | if args.background_color == 'None': 227 | args.background_color = None 228 | 229 | if args.colormask and args.color: 230 | raise ValueError('specify either a color mask or a color function') 231 | 232 | args = vars(args) 233 | 234 | with args.pop('text') as f: 235 | text = f.read() 236 | 237 | if args['stopwords']: 238 | with args.pop('stopwords') as f: 239 | args['stopwords'] = set(map(lambda l: l.strip(), f.readlines())) 240 | 241 | if args['mask']: 242 | mask = args.pop('mask') 243 | args['mask'] = np.array(Image.open(mask)) 244 | 245 | color_func = wc.random_color_func 246 | colormask = args.pop('colormask') 247 | color = args.pop('color') 248 | if colormask: 249 | image = np.array(Image.open(colormask)) 250 | color_func = wc.ImageColorGenerator(image) 251 | if color: 252 | color_func = wc.get_single_color_func(color) 253 | args['color_func'] = color_func 254 | 255 | imagefile = args.pop('imagefile') 256 | 257 | return args, text, imagefile 258 | --------------------------------------------------------------------------------