├── word_cloud
    ├── __init__.py
    └── word_cloud_generator.py
├── _config.yml
├── word_cloud.gif
├── .gitignore
├── setup.py
├── README.md
├── Word Cloud Examples.ipynb
└── Example word clouds.ipynb


/word_cloud/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-hacker


--------------------------------------------------------------------------------
/word_cloud.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kavgan/word_cloud/HEAD/word_cloud.gif


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.xml
2 | *.iml
3 | .ipynb_checkpoints/Word Cloud Examples-checkpoint.ipynb
4 | *.pyc
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | import io,os,sys
 3 | 
 4 | def tag():
 5 |     return os.getenv("version")
 6 | 
 7 | 
 8 | def read_text_lines(fname):
 9 |     with io.open(fname) as fd:
10 |         lines=fd.readlines()
11 |         return ''.join(lines)
12 | 
13 | 
14 | setup(
15 |     name="word_cloud",
16 |     version=tag(),
17 |     packages=find_packages(),
18 |     description='Word cloud of data scientist',
19 |     long_description=open("README.md").read(),
20 |     classifiers=[
21 |         'Programming Language :: Python :: 3.5',
22 |         'Programming Language :: Python :: 3.6',
23 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
24 |         'Topic :: Scientific/Engineering :: Information Analysis',
25 |         'Topic :: Text Processing :: Linguistic'
26 |     ],
27 |     author='kavgan',
28 |     author_email='ganesan.kavita@gmail.com',
29 |     license='Apache',
30 |     url='https://github.com/kavgan/word_cloud',
31 |     download_url='https://github.com/kavgan/word_cloud/archive/{0}.tar.gz'.format(tag()),
32 |     keywords=['word cloud','visualization','text mining'],
33 |     install_requires=[
34 |         'scikit-learn>=0.19.1',
35 |         'pandas>=0.20.3'
36 |     ],
37 |     include_package_data=True,
38 |     entry_points={
39 | 
40 |     }
41 | )
42 | 
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # word_cloud
 2 | Library for word cloud visualization for data scientists. Use within Jupyter notebook, from a webapp, etc.
 3 | 
 4 | ![alt text](word_cloud.gif)
 5 | 
 6 | ## Features
 7 | 
 8 | - Generate word cloud for individual documents
 9 | - Generate word cloud using a list of documents
10 | - Generate word cloud for words or phrases that already have scores defined
11 | - Embed in Jupyter Notebook
12 | - Show on an HTML page
13 | - Randomize colors
14 | 
15 | 
16 | ## Quick Start
17 | 
18 | 1. Install with pip
19 | 
20 | ```
21 | pip install git+ssh://git@github.com/kavgan/word_cloud.git
22 | ```
23 | 
24 | 2. Instantiate WordCloud, get word cloud HTML code and display!
25 | 
26 | ``` python
27 | from word_cloud.word_cloud_generator import WordCloud
28 | from IPython.core.display import HTML
29 | 
30 | ENGLISH_STOP_WORDS = frozenset([
31 |     "a", "about", "above", "across", "after", "afterwards", "again", "against",
32 |     "all", "almost", "alone", "along", "already", "also", "although", "always",
33 |     "am", "among", "amongst", "amoungst", "amount", "an", "and", "another",
34 |     "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are",
35 |     "around", "as", "at", "back", "be", "became", "because", "become",
36 |     "becomes", "becoming", "been", "before", "beforehand", "behind", "being",
37 |     "below", "beside", "besides", "between", "beyond", "bill", "both",
38 |     "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "con",
39 |     "could", "couldnt", "cry", "de", "describe", "detail", "do", "done",
40 |     "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else",
41 |     "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone",
42 |     "everything", "everywhere", "except", "few", "fifteen", "fifty", "fill",
43 |     "find", "fire", "first", "five", "for", "former", "formerly", "forty",
44 |     "found", "four", "from", "front", "full", "further", "get", "give", "go",
45 |     "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter",
46 |     "who", "whoever", "whole", "whom", "whose", "why", "will", "with",
47 |     "within", "without", "would", "yet", "said","you", "your", "yours", "yourself",
48 |     "yourselves"])
49 | 
50 | # list of documents
51 | texts=['MEXICO CITY — Newly formed Hurricane Willa rapidly intensified off Mexico\'s Pacific coast','MEXICO CITY — Newly formed Hurricane Willa rapidly intensified off Mexico\'s Pacific coast Sunday and early Monday and became a major Category 5 storm, the U.S. National Hurricane Center said. As of 11 a.m. ET., Willa had maximum sustained winds of 160 mph -- just 3 mph over the threshold for a Category 5.    Willa was "potentially catastrophic," forecasters warned. The hurricane center said it could make landfall along Mexico\'s southwestern coast Tuesday afternoon or evening and bring with it a life-threatening storm surge -- especially near and to the south of where the center of Willa makes landfall.    Near the coast, the surge will be accompanied by large and destructive waves. Willa is also forecast to bring high winds and heavy rainfall.    "Slight weakening is forecast to begin on Tuesday, but Willa is expected to be an extremely dangerous major hurricane when it reaches the coast of Mexico," the center said.    A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018.   A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018. NATIONAL HURRICANE CENTER  The center said Willa was about 175 miles south-southwest of Las Islas Marias, Mexico, and some 135 miles southwest of Cabo Corrientes, Mexico, and was moving north at about 7 mph.    Hurricane-force winds extended outward up to 30 miles from the center and tropical-storm-force winds extended outward up to 105 miles.    A hurricane warning was posted for a stretch of shore between San Blas and Mazatlan. A tropical storm warning was in effect for Playa Perula to San Blas and north of Mazatlan to Bahia Tempehuaya.    Forecasters said Willa is expected to produce storm total rainfall accumulations of 6 to 12 inches, with local amounts up to 18 inches, across portions of western Jalisco, western Nayarit, and southern Sinaloa in Mexico. The rainfall could cause life-threatening flash flooding and landslides.    Farther inland, Willa is expected to produce rainfall amounts of 2 to 4 inches across portions of Zacateca, Durango, southeast Chihuahua, and Coahuila in Mexico, with local amounts up to 6 inches possible. That could cause life-threatening flash flooding.    After Willa makes its way across Mexico, it could drop between 1 and 3 inches of rain on central and southern Texas during the middle of the week, CBS News contributing meteorologist Jeff Berardelli reports. The additional rainfall could cause additional flooding in already saturated areas.','early Monday and became a major Category 5 storm, the U.S. National Hurricane Center said. As of 11 a.m. ET., Willa had maximum sustained winds of 160 mph -- just 3 mph over the threshold for a Category 5.    Willa was "potentially catastrophic," forecasters warned. The hurricane center said it could make landfall along Mexico\'s southwestern coast Tuesday afternoon or evening and bring with it a life-threatening storm surge -- especially near and to the south of where the center of Willa makes landfall.    Near the coast, the surge will be accompanied by large and destructive waves. Willa is also forecast to bring high winds and heavy rainfall.    "Slight weakening is forecast to begin on Tuesday, but Willa is expected to be an extremely dangerous major hurricane when it reaches the coast of Mexico," the center said.    A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018.   A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018. NATIONAL HURRICANE CENTER  The center said Willa was about 175 miles south-southwest of Las Islas Marias, Mexico, and some 135 miles southwest of Cabo Corrientes, Mexico, and was moving north at about 7 mph.    Hurricane-force winds extended outward up to 30 miles from the center and tropical-storm-force winds extended outward up to 105 miles.    A hurricane warning was posted for a stretch of shore between San Blas and Mazatlan. A tropical storm warning was in effect for Playa Perula to San Blas and north of Mazatlan to Bahia Tempehuaya.    Forecasters said Willa is expected to produce storm total rainfall accumulations of 6 to 12 inches, with local amounts up to 18 inches, across portions of western Jalisco, western Nayarit, and southern Sinaloa in Mexico. The rainfall could cause life-threatening flash flooding and landslides.    Farther inland, Willa is expected to produce rainfall amounts of 2 to 4 inches across portions of Zacateca, Durango, southeast Chihuahua, and Coahuila in Mexico, with local amounts up to 6 inches possible. That could cause life-threatening flash flooding.    After Willa makes its way across Mexico, it could drop between 1 and 3 inches of rain on central and southern Texas during the middle of the week, CBS News contributing meteorologist Jeff Berardelli reports. The additional rainfall could cause additional flooding in already saturated areas.']
52 | 
53 | # initialize WordCloud
54 | wc=WordCloud(stopwords=ENGLISH_STOP_WORDS)
55 | 
56 | # get html code
57 | embed_code=wc.get_embed_code(text=texts,random_color=True,topn=40)
58 | 
59 | # display
60 | HTML(embed_code)
61 | 
62 | ```
63 | 
64 | ## More Examples
65 | - [Checkout Jupyter Notebook from this Repo](https://github.com/kavgan/word_cloud/blob/master/Example%20word%20clouds.ipynb) (word cloud only renders if your server is running)
66 | - [Jupyter Notebook on Google's Colaboratory](https://colab.research.google.com/drive/1AkdUKEFmaYom77r6KPh18jdQrplIQbKQ)
67 | - Article about this [Python word cloud](http://kavita-ganesan.com/word-cloud-for-data-scientists/#.W86v6RNKj64) module
68 | 


--------------------------------------------------------------------------------
/word_cloud/word_cloud_generator.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 
  3 | Generate HTML code for word cloud
  4 | 
  5 | '''
  6 | 
  7 | import pandas as pd
  8 | import logging
  9 | import numpy as np
 10 | import random
 11 | from sklearn.feature_extraction.text import CountVectorizer
 12 | from sklearn.feature_extraction.text import TfidfTransformer
 13 | 
 14 | 
 15 | class WordCloud:
 16 | 
 17 |     def __init__(self, stopwords=[], use_tfidf=False):
 18 | 
 19 |         self.use_tfidf = use_tfidf
 20 |         self.data = []
 21 |         self.color_choices = ['#b82c2c',
 22 |                               '#a55571',
 23 |                               '#bc72d0',
 24 |                               '#8000FF',
 25 |                               '#3498DB',
 26 |                               '#FF5733',
 27 |                               '#223AE6',
 28 |                               '#2ECC71',
 29 |                               '#5F6A6A',
 30 |                               '#6C22E6',
 31 |                               '#CE22E6',
 32 |                               '#ACB02E',
 33 |                               '#B18904',
 34 |                               '#848484',
 35 |                               '#04B404',
 36 |                               '#5882FA',
 37 |                               '#FF0080',
 38 |                               '#0489B1',
 39 |                               '#FA5858',
 40 |                               '#DBA901',
 41 |                               '#00b4ff',
 42 |                               '#008080',
 43 |                               '#003366',
 44 |                               '#725394'
 45 |                               ]
 46 |         self.color_choices = ['#b82c2c',
 47 |                               '#a55571',
 48 |                               '#bc72d0',
 49 |                               '#8000FF',
 50 |                               '#3498DB',
 51 |                               '#FF5733',
 52 |                               '#223AE6',
 53 |                               '#2ECC71',
 54 |                               '#5F6A6A',
 55 |                               '#6C22E6',
 56 |                               '#CE22E6',
 57 |                               '#ACB02E',
 58 |                               '#B18904',
 59 |                               '#848484',
 60 |                               '#04B404',
 61 |                               '#5882FA',
 62 |                               '#FF0080',
 63 |                               '#0489B1',
 64 |                               '#FA5858',
 65 |                               '#DBA901',
 66 |                               '#00b4ff',
 67 |                               '#008080',
 68 |                               '#003366',
 69 |                               '#725394'
 70 |                               ]
 71 | 
 72 |         # load a set of stop words
 73 |         self.stopwords = stopwords
 74 | 
 75 |     def get_color_code(self, score):
 76 |         """Get the appropriate color codes."""
 77 | 
 78 |         step = 0.05
 79 |         current_incremented_score = 0
 80 |         idx = 0
 81 | 
 82 |         while current_incremented_score < 1:
 83 |             if score <= current_incremented_score:
 84 |                 return self.color_choices[idx]
 85 |             idx += 1
 86 |             current_incremented_score = current_incremented_score + step
 87 | 
 88 |         return self.color_choices[0]
 89 | 
 90 | 
 91 |     def get_font_size(self, score: float):
 92 |         """Increment scale until score almost equals current_incremented_score."""
 93 | 
 94 |         # font size start and increment
 95 |         scale = 0.5
 96 |         max_scale = 2.5
 97 |         scale_step = 0.15
 98 | 
 99 |         # score increment
100 |         score_step = 0.05
101 |         current_incremented_score = 0
102 | 
103 |         while current_incremented_score < 1:
104 | 
105 |             # increment scale until score almost equals current_incremented_score
106 |             # the larger the score, the more the scale increment
107 |             if score <= current_incremented_score:
108 |                 return scale
109 | 
110 |             current_incremented_score = current_incremented_score + score_step
111 |             scale += scale_step
112 | 
113 |             #if scale > max_scale:
114 |              #   scale = max_scale
115 | 
116 |         return scale
117 | 
118 |     def get_embed_code(self, text_scores: pd.DataFrame = None, text: list = [], topn=100, random_color=True):
119 | 
120 |         if text_scores is None and len(text) > 0:
121 |             items = self.extract_topn_from_vector(text, topn=topn)
122 |             text_df = pd.DataFrame(items, columns=['words', 'score'])
123 |         elif text_scores is not None:
124 |             text_df = text_scores
125 |             text_df.columns = ['words', 'score']
126 |         else:
127 |             logging.error(
128 |                 "There is a problem with your input text. Did you provide any?")
129 |             return
130 | 
131 |         if random_color:
132 |             random.shuffle(self.color_choices)
133 | 
134 |         word_cloud_items = []
135 | 
136 |         html = [
137 |             "<div align='center' style='width:100%'><div align='center' style='text-align:justify; border-radius: 25px;background: #fff7f7;overflow: auto; width:500px !important; padding:20px; '; text-align: center; word-wrap: break-word;>"]
138 |         for idx, row in text_df.iterrows():
139 |             word = row.words.replace(" ", "-")
140 |             scale = self.get_font_size(row.score)
141 |             color_code = self.get_color_code(row.score)
142 |             word_cloud_items.append(
143 |                 " <span style='color:{0};font-size:{1}em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>{2}&nbsp;</span>".format(
144 |                     color_code, scale, word))
145 | 
146 |         random.shuffle(word_cloud_items)
147 |         random.shuffle(word_cloud_items)
148 | 
149 |         html.extend(word_cloud_items)
150 |         html.append("</div></div>")
151 |         return ''.join(html)
152 | 
153 |     def sort_coo(self, coo_matrix):
154 |         tuples = zip(coo_matrix.col, coo_matrix.data)
155 |         return sorted(tuples, key=lambda x: (x[1], x[0]), reverse=True)
156 | 
157 | 
158 |     def get_ranks(self,word_vector):
159 |         """Get normalized tf."""
160 | 
161 |         max = np.max(word_vector)
162 | 
163 |         # normalize raw counts
164 |         word_count_vector = np.multiply(word_vector, 1/(max))
165 | 
166 |         return word_count_vector
167 | 
168 |     def get_normalized_tf(self, cv: CountVectorizer, text: list):
169 |         """Get normalized tf."""
170 | 
171 |         big_text = ' '.join(text)
172 |         word_count_vector = cv.fit_transform([big_text])
173 |         max = np.max(word_count_vector)
174 | 
175 |         # normalize raw counts
176 |         word_count_vector = np.multiply(word_count_vector, 1/(max))
177 | 
178 |         return word_count_vector
179 | 
180 |     def get_tfidf_scores(self, cv: CountVectorizer, text: list):
181 |         """Get tfidf values."""
182 | 
183 |         word_count_vector = cv.fit_transform(text)
184 | 
185 |         big_text = ' '.join(text)
186 | 
187 |         # compute word scores
188 |         tfidf_transformer = TfidfTransformer(
189 |             smooth_idf=False, use_idf=True, norm='l2')
190 |         tfidf_transformer.fit(word_count_vector)
191 |         tf_idf_vector = tfidf_transformer.transform(cv.transform([big_text]))
192 | 
193 |         return tf_idf_vector
194 | 
195 |     def extract_topn_from_vector(self, text: list, topn=10):
196 |         """Extract keywords based on tf-idf score."""
197 | 
198 |         # get word count
199 |         cv = CountVectorizer(stop_words=self.stopwords)
200 | 
201 |         word_scores_vector = None
202 |         if self.use_tfidf:
203 |             word_scores_vector = self.get_tfidf_scores(cv, text)
204 |         else:
205 |             word_scores_vector = self.get_normalized_tf(cv, text)
206 | 
207 |         #word_scores_vector=self.get_ranks(word_scores_vector)
208 | 
209 |         # sort the tf-idf vectors by descending order of scores
210 |         sorted_items = self.sort_coo(word_scores_vector.tocoo())
211 |         sorted_items = sorted_items[:topn]
212 | 
213 | 
214 | 
215 |         final_items = []
216 | 
217 |         # word index and corresponding tf-idf score
218 |         for idx, score in sorted_items:
219 |             final_items.append([cv.get_feature_names()[idx], score])
220 | 
221 |         return final_items
222 | 


--------------------------------------------------------------------------------
/Word Cloud Examples.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/html": [
 11 |        "<div align='center' style='width:100%'><div align='center' style='text-align:justify; border-radius: 25px;background: #fff7f7;overflow: auto; width:500px !important; padding:20px; '; text-align: center; word-wrap: break-word;> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>near&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>forecast&nbsp;</span> <span style='color:#FF5733;font-size:2.0em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>rainfall&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>forecasters&nbsp;</span> <span style='color:#5F6A6A;font-size:2.8999999999999995em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>center&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>cause&nbsp;</span> <span style='color:#3498DB;font-size:1.7000000000000002em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>coast&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>oct&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>path&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>life&nbsp;</span> <span style='color:#3498DB;font-size:1.7000000000000002em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>miles&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>projected&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mazatlan&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>makes&nbsp;</span> <span style='color:#CE22E6;font-size:3.0em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>hurricane&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>produce&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>shows&nbsp;</span> <span style='color:#ACB02E;font-size:3.0em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>willa&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>category&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>south&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>warning&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>map&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>surge&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mph&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>flooding&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>tuesday&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>local&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>north&nbsp;</span> <span style='color:#3498DB;font-size:1.7000000000000002em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>winds&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>landfall&nbsp;</span> <span style='color:#3498DB;font-size:1.7000000000000002em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>national&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>san&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>southern&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>outward&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>portions&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>11&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>southwest&nbsp;</span> <span style='color:#5F6A6A;font-size:2.8999999999999995em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mexico&nbsp;</span> <span style='color:#FF5733;font-size:2.0em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>storm&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>threatening&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>flash&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>expected&nbsp;</span> <span style='color:#FF5733;font-size:2.0em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>inches&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>extended&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>et&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>tropical&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>force&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>western&nbsp;</span> <span style='color:#8000FF;font-size:1.4000000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>amounts&nbsp;</span> <span style='color:#bc72d0;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>major&nbsp;</span></div></div>"
 12 |       ],
 13 |       "text/plain": [
 14 |        "<IPython.core.display.HTML object>"
 15 |       ]
 16 |      },
 17 |      "execution_count": 4,
 18 |      "metadata": {},
 19 |      "output_type": "execute_result"
 20 |     }
 21 |    ],
 22 |    "source": [
 23 |     "from word_cloud.word_cloud import WordCloud\n",
 24 |     "from IPython.core.display import HTML\n",
 25 |     "\n",
 26 |     "ENGLISH_STOP_WORDS = frozenset([\n",
 27 |     "    \"a\", \"about\", \"above\", \"across\", \"after\", \"afterwards\", \"again\", \"against\",\n",
 28 |     "    \"all\", \"almost\", \"alone\", \"along\", \"already\", \"also\", \"although\", \"always\",\n",
 29 |     "    \"am\", \"among\", \"amongst\", \"amoungst\", \"amount\", \"an\", \"and\", \"another\",\n",
 30 |     "    \"any\", \"anyhow\", \"anyone\", \"anything\", \"anyway\", \"anywhere\", \"are\",\n",
 31 |     "    \"around\", \"as\", \"at\", \"back\", \"be\", \"became\", \"because\", \"become\",\n",
 32 |     "    \"becomes\", \"becoming\", \"been\", \"before\", \"beforehand\", \"behind\", \"being\",\n",
 33 |     "    \"below\", \"beside\", \"besides\", \"between\", \"beyond\", \"bill\", \"both\",\n",
 34 |     "    \"bottom\", \"but\", \"by\", \"call\", \"can\", \"cannot\", \"cant\", \"co\", \"con\",\n",
 35 |     "    \"could\", \"couldnt\", \"cry\", \"de\", \"describe\", \"detail\", \"do\", \"done\",\n",
 36 |     "    \"down\", \"due\", \"during\", \"each\", \"eg\", \"eight\", \"either\", \"eleven\", \"else\",\n",
 37 |     "    \"elsewhere\", \"empty\", \"enough\", \"etc\", \"even\", \"ever\", \"every\", \"everyone\",\n",
 38 |     "    \"everything\", \"everywhere\", \"except\", \"few\", \"fifteen\", \"fifty\", \"fill\",\n",
 39 |     "    \"find\", \"fire\", \"first\", \"five\", \"for\", \"former\", \"formerly\", \"forty\",\n",
 40 |     "    \"found\", \"four\", \"from\", \"front\", \"full\", \"further\", \"get\", \"give\", \"go\",\n",
 41 |     "    \"had\", \"has\", \"hasnt\", \"have\", \"he\", \"hence\", \"her\", \"here\", \"hereafter\",\n",
 42 |     "    \"hereby\", \"herein\", \"hereupon\", \"hers\", \"herself\", \"him\", \"himself\", \"his\",\n",
 43 |     "    \"how\", \"however\", \"hundred\", \"i\", \"ie\", \"if\", \"in\", \"inc\", \"indeed\",\n",
 44 |     "    \"interest\", \"into\", \"is\", \"it\", \"its\", \"itself\", \"keep\", \"last\", \"latter\",\n",
 45 |     "    \"latterly\", \"least\", \"less\", \"ltd\", \"made\", \"many\", \"may\", \"me\",\n",
 46 |     "    \"meanwhile\", \"might\", \"mill\", \"mine\", \"more\", \"moreover\", \"most\", \"mostly\",\n",
 47 |     "    \"move\", \"much\", \"must\", \"my\", \"myself\", \"name\", \"namely\", \"neither\",\n",
 48 |     "    \"never\", \"nevertheless\", \"next\", \"nine\", \"no\", \"nobody\", \"none\", \"noone\",\n",
 49 |     "    \"nor\", \"not\", \"nothing\", \"now\", \"nowhere\", \"of\", \"off\", \"often\", \"on\",\n",
 50 |     "    \"once\", \"one\", \"only\", \"onto\", \"or\", \"other\", \"others\", \"otherwise\", \"our\",\n",
 51 |     "    \"ours\", \"ourselves\", \"out\", \"over\", \"own\", \"part\", \"per\", \"perhaps\",\n",
 52 |     "    \"please\", \"put\", \"rather\", \"re\", \"same\", \"see\", \"seem\", \"seemed\",\n",
 53 |     "    \"seeming\", \"seems\", \"serious\", \"several\", \"she\", \"should\", \"show\", \"side\",\n",
 54 |     "    \"since\", \"sincere\", \"six\", \"sixty\", \"so\", \"some\", \"somehow\", \"someone\",\n",
 55 |     "    \"something\", \"sometime\", \"sometimes\", \"somewhere\", \"still\", \"such\",\n",
 56 |     "    \"system\", \"take\", \"ten\", \"than\", \"that\", \"the\", \"their\", \"them\",\n",
 57 |     "    \"themselves\", \"then\", \"thence\", \"there\", \"thereafter\", \"thereby\",\n",
 58 |     "    \"therefore\", \"therein\", \"thereupon\", \"these\", \"they\", \"thick\", \"thin\",\n",
 59 |     "    \"third\", \"this\", \"those\", \"though\", \"three\", \"through\", \"throughout\",\n",
 60 |     "    \"thru\", \"thus\", \"to\", \"together\", \"too\", \"top\", \"toward\", \"towards\",\n",
 61 |     "    \"twelve\", \"twenty\", \"two\", \"un\", \"under\", \"until\", \"up\", \"upon\", \"us\",\n",
 62 |     "    \"very\", \"via\", \"was\", \"we\", \"well\", \"were\", \"what\", \"whatever\", \"when\",\n",
 63 |     "    \"whence\", \"whenever\", \"where\", \"whereafter\", \"whereas\", \"whereby\",\n",
 64 |     "    \"wherein\", \"whereupon\", \"wherever\", \"whether\", \"which\", \"while\", \"whither\",\n",
 65 |     "    \"who\", \"whoever\", \"whole\", \"whom\", \"whose\", \"why\", \"will\", \"with\",\n",
 66 |     "    \"within\", \"without\", \"would\", \"yet\", \"said\",\"you\", \"your\", \"yours\", \"yourself\",\n",
 67 |     "    \"yourselves\"])\n",
 68 |     "\n",
 69 |     "wc=WordCloud(use_tfidf=False,stopwords=ENGLISH_STOP_WORDS)\n",
 70 |     "texts=['MEXICO CITY — Newly formed Hurricane Willa rapidly intensified off Mexico\\'s Pacific coast','MEXICO CITY — Newly formed Hurricane Willa rapidly intensified off Mexico\\'s Pacific coast Sunday and early Monday and became a major Category 5 storm, the U.S. National Hurricane Center said. As of 11 a.m. ET., Willa had maximum sustained winds of 160 mph -- just 3 mph over the threshold for a Category 5.    Willa was \"potentially catastrophic,\" forecasters warned. The hurricane center said it could make landfall along Mexico\\'s southwestern coast Tuesday afternoon or evening and bring with it a life-threatening storm surge -- especially near and to the south of where the center of Willa makes landfall.    Near the coast, the surge will be accompanied by large and destructive waves. Willa is also forecast to bring high winds and heavy rainfall.    \"Slight weakening is forecast to begin on Tuesday, but Willa is expected to be an extremely dangerous major hurricane when it reaches the coast of Mexico,\" the center said.    A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018.   A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018. NATIONAL HURRICANE CENTER  The center said Willa was about 175 miles south-southwest of Las Islas Marias, Mexico, and some 135 miles southwest of Cabo Corrientes, Mexico, and was moving north at about 7 mph.    Hurricane-force winds extended outward up to 30 miles from the center and tropical-storm-force winds extended outward up to 105 miles.    A hurricane warning was posted for a stretch of shore between San Blas and Mazatlan. A tropical storm warning was in effect for Playa Perula to San Blas and north of Mazatlan to Bahia Tempehuaya.    Forecasters said Willa is expected to produce storm total rainfall accumulations of 6 to 12 inches, with local amounts up to 18 inches, across portions of western Jalisco, western Nayarit, and southern Sinaloa in Mexico. The rainfall could cause life-threatening flash flooding and landslides.    Farther inland, Willa is expected to produce rainfall amounts of 2 to 4 inches across portions of Zacateca, Durango, southeast Chihuahua, and Coahuila in Mexico, with local amounts up to 6 inches possible. That could cause life-threatening flash flooding.    After Willa makes its way across Mexico, it could drop between 1 and 3 inches of rain on central and southern Texas during the middle of the week, CBS News contributing meteorologist Jeff Berardelli reports. The additional rainfall could cause additional flooding in already saturated areas.','early Monday and became a major Category 5 storm, the U.S. National Hurricane Center said. As of 11 a.m. ET., Willa had maximum sustained winds of 160 mph -- just 3 mph over the threshold for a Category 5.    Willa was \"potentially catastrophic,\" forecasters warned. The hurricane center said it could make landfall along Mexico\\'s southwestern coast Tuesday afternoon or evening and bring with it a life-threatening storm surge -- especially near and to the south of where the center of Willa makes landfall.    Near the coast, the surge will be accompanied by large and destructive waves. Willa is also forecast to bring high winds and heavy rainfall.    \"Slight weakening is forecast to begin on Tuesday, but Willa is expected to be an extremely dangerous major hurricane when it reaches the coast of Mexico,\" the center said.    A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018.   A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018. NATIONAL HURRICANE CENTER  The center said Willa was about 175 miles south-southwest of Las Islas Marias, Mexico, and some 135 miles southwest of Cabo Corrientes, Mexico, and was moving north at about 7 mph.    Hurricane-force winds extended outward up to 30 miles from the center and tropical-storm-force winds extended outward up to 105 miles.    A hurricane warning was posted for a stretch of shore between San Blas and Mazatlan. A tropical storm warning was in effect for Playa Perula to San Blas and north of Mazatlan to Bahia Tempehuaya.    Forecasters said Willa is expected to produce storm total rainfall accumulations of 6 to 12 inches, with local amounts up to 18 inches, across portions of western Jalisco, western Nayarit, and southern Sinaloa in Mexico. The rainfall could cause life-threatening flash flooding and landslides.    Farther inland, Willa is expected to produce rainfall amounts of 2 to 4 inches across portions of Zacateca, Durango, southeast Chihuahua, and Coahuila in Mexico, with local amounts up to 6 inches possible. That could cause life-threatening flash flooding.    After Willa makes its way across Mexico, it could drop between 1 and 3 inches of rain on central and southern Texas during the middle of the week, CBS News contributing meteorologist Jeff Berardelli reports. The additional rainfall could cause additional flooding in already saturated areas.']\n",
 71 |     "embed_code=wc.get_embed_code(text=texts,random_color=False,topn=50)\n",
 72 |     "HTML(embed_code)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": []
 81 |   }
 82 |  ],
 83 |  "metadata": {
 84 |   "kernelspec": {
 85 |    "display_name": "Python 3",
 86 |    "language": "python",
 87 |    "name": "python3"
 88 |   },
 89 |   "language_info": {
 90 |    "codemirror_mode": {
 91 |     "name": "ipython",
 92 |     "version": 3
 93 |    },
 94 |    "file_extension": ".py",
 95 |    "mimetype": "text/x-python",
 96 |    "name": "python",
 97 |    "nbconvert_exporter": "python",
 98 |    "pygments_lexer": "ipython3",
 99 |    "version": "3.6.5"
100 |   }
101 |  },
102 |  "nbformat": 4,
103 |  "nbformat_minor": 1
104 | }
105 | 


--------------------------------------------------------------------------------
/Example word clouds.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from word_cloud.word_cloud_generator import WordCloud\n",
 10 |     "from IPython.core.display import HTML\n",
 11 |     "from nltk.corpus import reuters\n",
 12 |     "import nltk\n",
 13 |     "import pandas as pd"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "ENGLISH_STOP_WORDS = frozenset([\n",
 23 |     "    \"a\", \"about\", \"above\", \"across\", \"after\", \"afterwards\", \"again\", \"against\",\n",
 24 |     "    \"all\", \"almost\", \"alone\", \"along\", \"already\", \"also\", \"although\", \"always\",\n",
 25 |     "    \"am\", \"among\", \"amongst\", \"amoungst\", \"amount\", \"an\", \"and\", \"another\",\n",
 26 |     "    \"any\", \"anyhow\", \"anyone\", \"anything\", \"anyway\", \"anywhere\", \"are\",\n",
 27 |     "    \"around\", \"as\", \"at\", \"back\", \"be\", \"became\", \"because\", \"become\",\n",
 28 |     "    \"becomes\", \"becoming\", \"been\", \"before\", \"beforehand\", \"behind\", \"being\",\n",
 29 |     "    \"below\", \"beside\", \"besides\", \"between\", \"beyond\", \"bill\", \"both\",\n",
 30 |     "    \"bottom\", \"but\", \"by\", \"call\", \"can\", \"cannot\", \"cant\", \"co\", \"con\",\n",
 31 |     "    \"could\", \"couldnt\", \"cry\", \"de\", \"describe\", \"detail\", \"do\", \"done\",\n",
 32 |     "    \"down\", \"due\", \"during\", \"each\", \"eg\", \"eight\", \"either\", \"eleven\", \"else\",\n",
 33 |     "    \"elsewhere\", \"empty\", \"enough\", \"etc\", \"even\", \"ever\", \"every\", \"everyone\",\n",
 34 |     "    \"everything\", \"everywhere\", \"except\", \"few\", \"fifteen\", \"fifty\", \"fill\",\n",
 35 |     "    \"find\", \"fire\", \"first\", \"five\", \"for\", \"former\", \"formerly\", \"forty\",\n",
 36 |     "    \"found\", \"four\", \"from\", \"front\", \"full\", \"further\", \"get\", \"give\", \"go\",\n",
 37 |     "    \"had\", \"has\", \"hasnt\", \"have\", \"he\", \"hence\", \"her\", \"here\", \"hereafter\",\n",
 38 |     "    \"hereby\", \"herein\", \"hereupon\", \"hers\", \"herself\", \"him\", \"himself\", \"his\",\n",
 39 |     "    \"how\", \"however\", \"hundred\", \"i\", \"ie\", \"if\", \"in\", \"inc\", \"indeed\",\n",
 40 |     "    \"interest\", \"into\", \"is\", \"it\", \"its\", \"itself\", \"keep\", \"last\", \"latter\",\n",
 41 |     "    \"latterly\", \"least\", \"less\", \"ltd\", \"made\", \"many\", \"may\", \"me\",\n",
 42 |     "    \"meanwhile\", \"might\", \"mill\", \"mine\", \"more\", \"moreover\", \"most\", \"mostly\",\n",
 43 |     "    \"move\", \"much\", \"must\", \"my\", \"myself\", \"name\", \"namely\", \"neither\",\n",
 44 |     "    \"never\", \"nevertheless\", \"next\", \"nine\", \"no\", \"nobody\", \"none\", \"noone\",\n",
 45 |     "    \"nor\", \"not\", \"nothing\", \"now\", \"nowhere\", \"of\", \"off\", \"often\", \"on\",\n",
 46 |     "    \"once\", \"one\", \"only\", \"onto\", \"or\", \"other\", \"others\", \"otherwise\", \"our\",\n",
 47 |     "    \"ours\", \"ourselves\", \"out\", \"over\", \"own\", \"part\", \"per\", \"perhaps\",\n",
 48 |     "    \"please\", \"put\", \"rather\", \"re\", \"same\", \"see\", \"seem\", \"seemed\",\n",
 49 |     "    \"seeming\", \"seems\", \"serious\", \"several\", \"she\", \"should\", \"show\", \"side\",\n",
 50 |     "    \"since\", \"sincere\", \"six\", \"sixty\", \"so\", \"some\", \"somehow\", \"someone\",\n",
 51 |     "    \"something\", \"sometime\", \"sometimes\", \"somewhere\", \"still\", \"such\",\n",
 52 |     "    \"system\", \"take\", \"ten\", \"than\", \"that\", \"the\", \"their\", \"them\",\n",
 53 |     "    \"themselves\", \"then\", \"thence\", \"there\", \"thereafter\", \"thereby\",\n",
 54 |     "    \"therefore\", \"therein\", \"thereupon\", \"these\", \"they\", \"thick\", \"thin\",\n",
 55 |     "    \"third\", \"this\", \"those\", \"though\", \"three\", \"through\", \"throughout\",\n",
 56 |     "    \"thru\", \"thus\", \"to\", \"together\", \"too\", \"top\", \"toward\", \"towards\",\n",
 57 |     "    \"twelve\", \"twenty\", \"two\", \"un\", \"under\", \"until\", \"up\", \"upon\", \"us\",\n",
 58 |     "    \"very\", \"via\", \"was\", \"we\", \"well\", \"were\", \"what\", \"whatever\", \"when\",\n",
 59 |     "    \"whence\", \"whenever\", \"where\", \"whereafter\", \"whereas\", \"whereby\",\n",
 60 |     "    \"wherein\", \"whereupon\", \"wherever\", \"whether\", \"which\", \"while\", \"whither\",\n",
 61 |     "    \"who\", \"whoever\", \"whole\", \"whom\", \"whose\", \"why\", \"will\", \"with\",\n",
 62 |     "    \"within\", \"without\", \"would\", \"yet\", \"said\",\"you\", \"your\", \"yours\", \"yourself\",\n",
 63 |     "    \"yourselves\"])\n"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "## Generate word clouds with a single text document\n",
 71 |     "\n",
 72 |     "This example show cases how you can generate word clouds with just one document. While the colors can be randomized, in this example, the colors are based on the default color settings. By default, the words are weighted by word counts unless you explicitly ask for `tfidf` weighting. Tfidf weighting makes sense only if you have a lot of documents to start with, otherwise the `idf` values would be incorrect."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 19,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "data": {
 82 |       "text/html": [
 83 |        "<div align='center' style='width:100%'><div align='center' style='text-align:justify; border-radius: 25px;background: #fff7f7;overflow: auto; width:500px !important; padding:20px; '; text-align: center; word-wrap: break-word;> <span style='color:#848484;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>inches&nbsp;</span> <span style='color:#5F6A6A;font-size:3.4999999999999987em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>willa&nbsp;</span> <span style='color:#2ECC71;font-size:2.749999999999999em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>center&nbsp;</span> <span style='color:#CE22E6;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>national&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>south&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>cause&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>tropical&nbsp;</span> <span style='color:#223AE6;font-size:3.3499999999999988em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>hurricane&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>11&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>path&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mazatlan&nbsp;</span> <span style='color:#CE22E6;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>miles&nbsp;</span> <span style='color:#CE22E6;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>winds&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>threatening&nbsp;</span> <span style='color:#2ECC71;font-size:2.749999999999999em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mexico&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>san&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>warning&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>outward&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>southern&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>southwest&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>tuesday&nbsp;</span> <span style='color:#848484;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>storm&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mph&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>et&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>surge&nbsp;</span> <span style='color:#CE22E6;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>coast&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>oct&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>shows&nbsp;</span> <span style='color:#848484;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>rainfall&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>flooding&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>projected&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>expected&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>life&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>produce&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>western&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>north&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>portions&nbsp;</span> <span style='color:#3498DB;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>amounts&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>near&nbsp;</span> <span style='color:#FF0080;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>map&nbsp;</span></div></div>"
 84 |       ],
 85 |       "text/plain": [
 86 |        "<IPython.core.display.HTML object>"
 87 |       ]
 88 |      },
 89 |      "execution_count": 19,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "#only one news article here\n",
 96 |     "texts=['MEXICO CITY — Newly formed Hurricane Willa rapidly intensified off Mexico\\'s Pacific coast','MEXICO CITY — Newly formed Hurricane Willa rapidly intensified off Mexico\\'s Pacific coast Sunday and early Monday and became a major Category 5 storm, the U.S. National Hurricane Center said. As of 11 a.m. ET., Willa had maximum sustained winds of 160 mph -- just 3 mph over the threshold for a Category 5.    Willa was \"potentially catastrophic,\" forecasters warned. The hurricane center said it could make landfall along Mexico\\'s southwestern coast Tuesday afternoon or evening and bring with it a life-threatening storm surge -- especially near and to the south of where the center of Willa makes landfall.    Near the coast, the surge will be accompanied by large and destructive waves. Willa is also forecast to bring high winds and heavy rainfall.    \"Slight weakening is forecast to begin on Tuesday, but Willa is expected to be an extremely dangerous major hurricane when it reaches the coast of Mexico,\" the center said.    A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018.   A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018. NATIONAL HURRICANE CENTER  The center said Willa was about 175 miles south-southwest of Las Islas Marias, Mexico, and some 135 miles southwest of Cabo Corrientes, Mexico, and was moving north at about 7 mph.    Hurricane-force winds extended outward up to 30 miles from the center and tropical-storm-force winds extended outward up to 105 miles.    A hurricane warning was posted for a stretch of shore between San Blas and Mazatlan. A tropical storm warning was in effect for Playa Perula to San Blas and north of Mazatlan to Bahia Tempehuaya.    Forecasters said Willa is expected to produce storm total rainfall accumulations of 6 to 12 inches, with local amounts up to 18 inches, across portions of western Jalisco, western Nayarit, and southern Sinaloa in Mexico. The rainfall could cause life-threatening flash flooding and landslides.    Farther inland, Willa is expected to produce rainfall amounts of 2 to 4 inches across portions of Zacateca, Durango, southeast Chihuahua, and Coahuila in Mexico, with local amounts up to 6 inches possible. That could cause life-threatening flash flooding.    After Willa makes its way across Mexico, it could drop between 1 and 3 inches of rain on central and southern Texas during the middle of the week, CBS News contributing meteorologist Jeff Berardelli reports. The additional rainfall could cause additional flooding in already saturated areas.','early Monday and became a major Category 5 storm, the U.S. National Hurricane Center said. As of 11 a.m. ET., Willa had maximum sustained winds of 160 mph -- just 3 mph over the threshold for a Category 5.    Willa was \"potentially catastrophic,\" forecasters warned. The hurricane center said it could make landfall along Mexico\\'s southwestern coast Tuesday afternoon or evening and bring with it a life-threatening storm surge -- especially near and to the south of where the center of Willa makes landfall.    Near the coast, the surge will be accompanied by large and destructive waves. Willa is also forecast to bring high winds and heavy rainfall.    \"Slight weakening is forecast to begin on Tuesday, but Willa is expected to be an extremely dangerous major hurricane when it reaches the coast of Mexico,\" the center said.    A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018.   A map made by the U.S. National Hurricane Center shows the projected path for Hurricane Willa as of 11 a.m. ET on Oct. 22, 2018. NATIONAL HURRICANE CENTER  The center said Willa was about 175 miles south-southwest of Las Islas Marias, Mexico, and some 135 miles southwest of Cabo Corrientes, Mexico, and was moving north at about 7 mph.    Hurricane-force winds extended outward up to 30 miles from the center and tropical-storm-force winds extended outward up to 105 miles.    A hurricane warning was posted for a stretch of shore between San Blas and Mazatlan. A tropical storm warning was in effect for Playa Perula to San Blas and north of Mazatlan to Bahia Tempehuaya.    Forecasters said Willa is expected to produce storm total rainfall accumulations of 6 to 12 inches, with local amounts up to 18 inches, across portions of western Jalisco, western Nayarit, and southern Sinaloa in Mexico. The rainfall could cause life-threatening flash flooding and landslides.    Farther inland, Willa is expected to produce rainfall amounts of 2 to 4 inches across portions of Zacateca, Durango, southeast Chihuahua, and Coahuila in Mexico, with local amounts up to 6 inches possible. That could cause life-threatening flash flooding.    After Willa makes its way across Mexico, it could drop between 1 and 3 inches of rain on central and southern Texas during the middle of the week, CBS News contributing meteorologist Jeff Berardelli reports. The additional rainfall could cause additional flooding in already saturated areas.']\n",
 97 |     "\n",
 98 |     "\n",
 99 |     "wc=WordCloud(use_tfidf=False,stopwords=ENGLISH_STOP_WORDS)\n",
100 |     "\n",
101 |     "#don't randomize color, show only top 50\n",
102 |     "embed_code=wc.get_embed_code(text=texts,random_color=True,topn=40)\n",
103 |     "HTML(embed_code)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "### Randomized color\n",
111 |     "This is the same example as above, with the colors randomized"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 12,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "data": {
121 |       "text/html": [
122 |        "<div align='center' style='width:100%'><div align='center' style='text-align:justify; border-radius: 25px;background: #fff7f7;overflow: auto; width:500px !important; padding:20px; '; text-align: center; word-wrap: break-word;> <span style='color:#2ECC71;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>storm&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>projected&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>flooding&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>oct&nbsp;</span> <span style='color:#2ECC71;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>rainfall&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>et&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>shows&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>forecast&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>near&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mph&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>western&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>south&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>life&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>warning&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>major&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>north&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>san&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>threatening&nbsp;</span> <span style='color:#B18904;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>winds&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mazatlan&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>tuesday&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>makes&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>category&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>forecasters&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>11&nbsp;</span> <span style='color:#B18904;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>miles&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>portions&nbsp;</span> <span style='color:#003366;font-size:3.4999999999999987em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>willa&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>surge&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>extended&nbsp;</span> <span style='color:#B18904;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>national&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>tropical&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>landfall&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>local&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>cause&nbsp;</span> <span style='color:#B18904;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>coast&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>path&nbsp;</span> <span style='color:#2ECC71;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>inches&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>force&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>southern&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>outward&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>southwest&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>expected&nbsp;</span> <span style='color:#5882FA;font-size:2.749999999999999em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mexico&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>flash&nbsp;</span> <span style='color:#725394;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>amounts&nbsp;</span> <span style='color:#5882FA;font-size:2.749999999999999em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>center&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>produce&nbsp;</span> <span style='color:#DBA901;font-size:3.3499999999999988em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>hurricane&nbsp;</span> <span style='color:#0489B1;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>map&nbsp;</span></div></div>"
123 |       ],
124 |       "text/plain": [
125 |        "<IPython.core.display.HTML object>"
126 |       ]
127 |      },
128 |      "execution_count": 12,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "#don't randomize color, show only top 50\n",
135 |     "embed_code=wc.get_embed_code(text=texts,random_color=True,topn=50)\n",
136 |     "HTML(embed_code)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "## Generate word clouds from multiple documents "
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 5,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "[nltk_data] Downloading package reuters to /Users/kavgan/nltk_data...\n",
156 |       "[nltk_data]   Package reuters is already up-to-date!\n"
157 |      ]
158 |     },
159 |     {
160 |      "data": {
161 |       "text/html": [
162 |        "<div align='center' style='width:100%'><div align='center' style='text-align:justify; border-radius: 25px;background: #fff7f7;overflow: auto; width:500px !important; padding:20px; '; text-align: center; word-wrap: break-word;> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>board&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>told&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>ual&nbsp;</span> <span style='color:#FA5858;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>group&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>offer&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>purchase&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>takeover&nbsp;</span> <span style='color:#FA5858;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>year&nbsp;</span> <span style='color:#a55571;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>securities&nbsp;</span> <span style='color:#FA5858;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>business&nbsp;</span> <span style='color:#a55571;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>unit&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>chairman&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>terms&nbsp;</span> <span style='color:#a55571;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>national&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>billion&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>bank&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>international&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>bid&nbsp;</span> <span style='color:#848484;font-size:2.4499999999999993em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>company&nbsp;</span> <span style='color:#FA5858;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>share&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>new&nbsp;</span> <span style='color:#FA5858;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>industries&nbsp;</span> <span style='color:#848484;font-size:2.4499999999999993em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>mln&nbsp;</span> <span style='color:#FA5858;font-size:1.25em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>agreement&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>april&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>statement&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>buy&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>price&nbsp;</span> <span style='color:#003366;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>acquisition&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>american&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>stake&nbsp;</span> <span style='color:#003366;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>stock&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>1986&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>sale&nbsp;</span> <span style='color:#a55571;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>acquired&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>companies&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>report&nbsp;</span> <span style='color:#848484;font-size:2.4499999999999993em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>dlrs&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>acquire&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>merger&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>plan&nbsp;</span> <span style='color:#725394;font-size:2.2999999999999994em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>shares&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>agreed&nbsp;</span> <span style='color:#DBA901;font-size:3.4999999999999987em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>lt&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>000&nbsp;</span> <span style='color:#bc72d0;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>corp&nbsp;</span> <span style='color:#6C22E6;font-size:0.9500000000000001em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>analysts&nbsp;</span> <span style='color:#bc72d0;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>pct&nbsp;</span> <span style='color:#a55571;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>chemical&nbsp;</span> <span style='color:#a55571;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>sell&nbsp;</span></div></div>"
163 |       ],
164 |       "text/plain": [
165 |        "<IPython.core.display.HTML object>"
166 |       ]
167 |      },
168 |      "execution_count": 5,
169 |      "metadata": {},
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 |    "source": [
174 |     "wc=WordCloud(use_tfidf=False,stopwords=ENGLISH_STOP_WORDS)\n",
175 |     "\n",
176 |     "nltk.download('reuters')\n",
177 |     "\n",
178 |     "#get all articles related to acquisitions\n",
179 |     "category_docs = reuters.fileids(\"acq\");\n",
180 |     "\n",
181 |     "\n",
182 |     "list_of_documents=[]\n",
183 |     "\n",
184 |     "#use raw content from a 100 documents\n",
185 |     "for i in range (100):\n",
186 |     "    document_id = category_docs[i]\n",
187 |     "    list_of_documents.append(reuters.raw(document_id)) \n",
188 |     "    \n",
189 |     "\n",
190 |     "#don't randomize color, show only top 50\n",
191 |     "embed_code=wc.get_embed_code(text=list_of_documents,random_color=True,topn=50)\n",
192 |     "HTML(embed_code)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "## Generate word clouds from existing weights\n",
200 |     "Let's say you already have a set of words with corresponding weights, and you just want to visualize it, that is also an option with this library. All you need to do is make sure that weights are normalized between [0-1]"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 6,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/html": [
211 |        "<div align='center' style='width:100%'><div align='center' style='text-align:justify; border-radius: 25px;background: #fff7f7;overflow: auto; width:500px !important; padding:20px; '; text-align: center; word-wrap: break-word;> <span style='color:#008080;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>ambitious&nbsp;</span> <span style='color:#FF5733;font-size:0.8em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>cool-place&nbsp;</span> <span style='color:#00b4ff;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>nice-work&nbsp;</span> <span style='color:#5882FA;font-size:1.4em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>smart&nbsp;</span> <span style='color:#008080;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>awesome&nbsp;</span> <span style='color:#FA5858;font-size:2.599999999999999em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>great-job&nbsp;</span> <span style='color:#a55571;font-size:2.1499999999999995em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>nice-colors&nbsp;</span> <span style='color:#008080;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>intelligent&nbsp;</span> <span style='color:#008080;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>small-font&nbsp;</span> <span style='color:#0489B1;font-size:2.2999999999999994em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>fun-place&nbsp;</span> <span style='color:#008080;font-size:1.8499999999999996em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>medium-font&nbsp;</span> <span style='color:#8000FF;font-size:2.4499999999999993em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>word-cloud&nbsp;</span> <span style='color:#00b4ff;font-size:1.1em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>crazy&nbsp;</span> <span style='color:#0489B1;font-size:2.2999999999999994em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>cool-cloud&nbsp;</span> <span style='color:#223AE6;font-size:1.5499999999999998em;white-space: normal;font-family:verdana;display: inline-block;line-height:30px'>phrase-cloud&nbsp;</span></div></div>"
212 |       ],
213 |       "text/plain": [
214 |        "<IPython.core.display.HTML object>"
215 |       ]
216 |      },
217 |      "execution_count": 6,
218 |      "metadata": {},
219 |      "output_type": "execute_result"
220 |     }
221 |    ],
222 |    "source": [
223 |     "wc=WordCloud(use_tfidf=False,stopwords=ENGLISH_STOP_WORDS)\n",
224 |     "\n",
225 |     "#words with corresponding weights\n",
226 |     "list_of_scores=[['nice-work',0.2],['great-job',0.7],['cool-place',0.1],['cool-cloud',0.6],['phrase-cloud',0.34],['word-cloud',0.625],['nice-colors',0.525],['small-font',0.4],['fun-place',0.6],['awesome',0.4],['intelligent',0.4],['medium-font',0.4],['crazy',0.2],['smart',0.3],['ambitious',0.4]]\n",
227 |     "\n",
228 |     "#don't randomize color, show only top 50\n",
229 |     "embed_code=wc.get_embed_code(text_scores=pd.DataFrame(list_of_scores),random_color=True,topn=50)\n",
230 |     "HTML(embed_code)"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": []
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": []
246 |   }
247 |  ],
248 |  "metadata": {
249 |   "kernelspec": {
250 |    "display_name": "Python 3",
251 |    "language": "python",
252 |    "name": "python3"
253 |   },
254 |   "language_info": {
255 |    "codemirror_mode": {
256 |     "name": "ipython",
257 |     "version": 3
258 |    },
259 |    "file_extension": ".py",
260 |    "mimetype": "text/x-python",
261 |    "name": "python",
262 |    "nbconvert_exporter": "python",
263 |    "pygments_lexer": "ipython3",
264 |    "version": "3.6.5"
265 |   }
266 |  },
267 |  "nbformat": 4,
268 |  "nbformat_minor": 2
269 | }
270 | 


--------------------------------------------------------------------------------