├── .gitignore
├── .markdownlint.json
├── .travis.yml
├── ACKNOWLEDGEMENTS.md
├── CONTRIBUTING.md
├── DEBUGGING.md
├── LICENSE
├── MAINTAINERS.md
├── README.md
├── dashboards
└── dashboard.json
├── data
└── example_facebook_data.csv
├── doc
└── source
│ └── images
│ ├── add_credentials.png
│ ├── add_file.png
│ ├── add_notebook.png
│ ├── architecture.png
│ ├── emotion.png
│ ├── emotional_engagement.png
│ ├── entities.png
│ ├── insert_file_credentials.png
│ ├── insert_to_code.png
│ ├── inserted_pandas.png
│ ├── keywords.png
│ ├── new_notebook.png
│ ├── sentiment.png
│ ├── sentimental_engagement.png
│ └── studio_project_overview.png
├── examples
└── enriched_example_facebook_data.csv
├── manifest.yml
└── notebooks
└── pixiedust_facebook_analysis.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
91 | # VS Code project files (and local history plugin files)
92 | .vscode
93 | .history
94 |
95 | # IDEA IDE projects
96 | .idea
97 |
98 |
--------------------------------------------------------------------------------
/.markdownlint.json:
--------------------------------------------------------------------------------
1 | {
2 | "_docs_for_this_file": "https://github.com/DavidAnson/markdownlint/blob/master/doc/Rules.md",
3 | "line-length": false,
4 | "first-line-h1": false,
5 | "header-style": {
6 | "style": "atx"
7 | },
8 | "ul-style": {
9 | "style": "asterisk"
10 | },
11 | "required-headers": {
12 | "headers": [
13 | "*",
14 | "## Flow",
15 | "## Included components",
16 | "## Steps",
17 | "*",
18 | "## License"
19 | ]
20 | },
21 | "no-blanks-blockquote": false
22 | }
23 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | cache:
3 | directories:
4 | - "node_modules"
5 | node_js:
6 | - "lts/*"
7 | before_script:
8 | - npm install -g markdownlint-cli
9 | script:
10 | - markdownlint --config $TRAVIS_BUILD_DIR/.markdownlint.json README.md
11 | - if grep -n -T --before-context 2 --after-context 1 TODO README.md; then exit 1; fi
12 | - if grep -n -T --before-context 2 --after-context 1 FIXME README.md; then exit 1; fi
13 |
--------------------------------------------------------------------------------
/ACKNOWLEDGEMENTS.md:
--------------------------------------------------------------------------------
1 | # Acknowledgements
2 |
3 | * Credit goes to [Anna Quincy](https://www.linkedin.com/in/anna-quincy-25042957) and [Tyler Andersen](https://www.linkedin.com/in/tyler-andersen-2bb82336) for providing the initial notebook.
4 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | This is an open source project, and we appreciate your help!
4 |
5 | We use the GitHub issue tracker to discuss new features and non-trivial bugs.
6 |
7 | In addition to the issue tracker, [#journeys on
8 | Slack](https://dwopen.slack.com) is the best way to get into contact with the
9 | project's maintainers.
10 |
11 | To contribute code, documentation, or tests, please submit a pull request to
12 | the GitHub repository. Generally, we expect two maintainers to review your pull
13 | request before it is approved for merging. For more details, see the
14 | [MAINTAINERS](MAINTAINERS.md) page.
15 |
16 | Contributions are subject to the [Developer Certificate of Origin, Version 1.1](https://developercertificate.org/) and the [Apache License, Version 2](https://www.apache.org/licenses/LICENSE-2.0.txt).
17 |
--------------------------------------------------------------------------------
/DEBUGGING.md:
--------------------------------------------------------------------------------
1 | Troubleshooting
2 | ===============
3 |
4 | Jupyter Notebooks
5 | -----------------
6 |
7 | * Make sure the pip install ran correctly. You might need to restart the
8 | kernel and run the cells from the top after the pip install runs the first
9 | time.
10 | * Many of the cells rely on variables that are set in earlier cells. Some of
11 | these are cleared in later cells. Start over at the top when troubleshooting.
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/MAINTAINERS.md:
--------------------------------------------------------------------------------
1 | # Maintainers Guide
2 |
3 | This guide is intended for maintainers - anybody with commit access to one or
4 | more Code Pattern repositories.
5 |
6 | ## Methodology
7 |
8 | This repository does not have a traditional release management cycle, but
9 | should instead be maintained as as a useful, working, and polished reference at
10 | all times. While all work can therefore be focused on the master branch, the
11 | quality of this branch should never be compromised.
12 |
13 | The remainder of this document details how to merge pull requests to the
14 | repositories.
15 |
16 | ## Merge approval
17 |
18 | The project maintainers use LGTM (Looks Good To Me) in comments on the pull
19 | request to indicate acceptance prior to merging. A change requires LGTMs from
20 | two project maintainers. If the code is written by a maintainer, the change
21 | only requires one additional LGTM.
22 |
23 | ## Reviewing Pull Requests
24 |
25 | We recommend reviewing pull requests directly within GitHub. This allows a
26 | public commentary on changes, providing transparency for all users. When
27 | providing feedback be civil, courteous, and kind. Disagreement is fine, so long
28 | as the discourse is carried out politely. If we see a record of uncivil or
29 | abusive comments, we will revoke your commit privileges and invite you to leave
30 | the project.
31 |
32 | During your review, consider the following points:
33 |
34 | ### Does the change have positive impact?
35 |
36 | Some proposed changes may not represent a positive impact to the project. Ask
37 | whether or not the change will make understanding the code easier, or if it
38 | could simply be a personal preference on the part of the author (see
39 | [bikeshedding](https://en.wiktionary.org/wiki/bikeshedding)).
40 |
41 | Pull requests that do not have a clear positive impact should be closed without
42 | merging.
43 |
44 | ### Do the changes make sense?
45 |
46 | If you do not understand what the changes are or what they accomplish, ask the
47 | author for clarification. Ask the author to add comments and/or clarify test
48 | case names to make the intentions clear.
49 |
50 | At times, such clarification will reveal that the author may not be using the
51 | code correctly, or is unaware of features that accommodate their needs. If you
52 | feel this is the case, work up a code sample that would address the pull
53 | request for them, and feel free to close the pull request once they confirm.
54 |
55 | ### Does the change introduce a new feature?
56 |
57 | For any given pull request, ask yourself "is this a new feature?" If so, does
58 | the pull request (or associated issue) contain narrative indicating the need
59 | for the feature? If not, ask them to provide that information.
60 |
61 | Are new unit tests in place that test all new behaviors introduced? If not, do
62 | not merge the feature until they are! Is documentation in place for the new
63 | feature? (See the documentation guidelines). If not do not merge the feature
64 | until it is! Is the feature necessary for general use cases? Try and keep the
65 | scope of any given component narrow. If a proposed feature does not fit that
66 | scope, recommend to the user that they maintain the feature on their own, and
67 | close the request. You may also recommend that they see if the feature gains
68 | traction among other users, and suggest they re-submit when they can show such
69 | support.
70 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/IBM/pixiedust-facebook-analysis)
2 |
3 | # Uncover insights from Facebook data with Watson services
4 |
5 | ## WARNING: This repository is no longer maintained.
6 |
7 | This repository will not be updated. The repository will be kept available in read-only mode.
8 |
9 | In this code pattern, we will use a Jupyter notebook with Watson Studio to glean insights from a vast body of unstructured data. We'll start with data exported from Facebook Analytics. We'll use Watson’s Natural Language Understanding and Visual Recognition to enrich the data.
10 |
11 | We'll use the enriched data to answer questions like:
12 |
13 | > What emotion is most prevalent in the posts with the highest engagement?
14 |
15 | > What sentiment has the higher engagement score on average?
16 |
17 | > What are the top keywords, entities or images measured by total reach?
18 |
19 | These types of insights are especially beneficial for marketing analysts who are interested in understanding and improving brand perception, product performance, customer satisfaction, and ways to engage their audiences.
20 |
21 | It is important to note that this code pattern is meant to be used as a guided experiment, rather than an application with one set output. The standard Facebook Analytics export features text from posts, articles, and thumbnails, along with standard Facebook performance metrics such as likes, shares, and impressions. This unstructured content was then enriched with Watson APIs to extract keywords, entities, sentiment, and emotion.
22 |
23 | After the data is enriched with Watson APIs, we'll use the Cognos Dashboard Embedded service to add a dashboard to the project. Using the dashboard you can explore our results and build your own sophisticated visualizations to communicate the insights you've discovered.
24 |
25 | This code pattern provides mock Facebook data, a notebook, and comes with several pre-built visualizations to jump start you with uncovering hidden insights.
26 |
27 | When the reader has completed this code pattern, they will understand how to:
28 |
29 | * Read external data in to a Jupyter Notebook via Object Storage and pandas DataFrames.
30 | * Use a Jupyter notebook and Watson APIs to enrich unstructured data.
31 | * Write data from a pandas DataFrame in a Jupyter Notebook out to a file in Object Storage.
32 | * Visualize and explore the enriched data.
33 |
34 | ## Flow
35 |
36 | 
37 |
38 | 1. A CSV file exported from Facebook Analytics is added to Object Storage.
39 | 1. Generated code makes the file accessible as a pandas DataFrame.
40 | 1. The data is enriched with Watson Natural Language Understanding.
41 | 1. The data is enriched with Watson Visual Recognition.
42 | 1. Use a dashboard to visualize the enriched data and uncover hidden insights.
43 |
44 | ## Included components
45 |
46 | * [IBM Watson Studio](https://dataplatform.cloud.ibm.com): Analyze data using RStudio, Jupyter, and Python in a configured, collaborative environment that includes IBM value-adds, such as managed Spark.
47 | * [IBM Watson Natural Language Understanding](https://www.ibm.com/watson/services/natural-language-understanding/): Natural language processing for advanced text analysis.
48 | * [IBM Watson Visual Recognition](https://www.ibm.com/watson/services/visual-recognition/): Understand image content.
49 | * [IBM Cognos Dashboard Embedded](https://cloud.ibm.com/catalog/services/ibm-cognos-dashboard-embedded): The IBM Cognos Dashboard Embedded lets you, the developer, painlessly add end-to-end data visualization capabilities to your application.
50 | * [IBM Cloud Object Storage](https://cloud.ibm.com/catalog/services/cloud-object-storage): An IBM Cloud service that provides an unstructured cloud data store to build and deliver cost effective apps and services with high reliability and fast speed to market.
51 | * [Jupyter Notebooks](https://jupyter.org/): An open-source web application that allows you to create and share documents that contain live code, equations, visualizations and explanatory text.
52 | * [pandas](https://pandas.pydata.org/): A Python library providing high-performance, easy-to-use data structures.
53 | * [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/): Beautiful Soup is a Python library for pulling data out of HTML and XML files.
54 |
55 | ## Steps
56 |
57 | Follow these steps to setup and run this code pattern. The steps are
58 | described in detail below.
59 |
60 | 1. [Clone the repo](#1-clone-the-repo)
61 | 1. [Create a new Watson Studio project](#2-create-a-new-watson-studio-project)
62 | 1. [Add services to the project](#3-add-services-to-the-project)
63 | 1. [Create the notebook in Watson Studio](#4-create-the-notebook-in-watson-studio)
64 | 1. [Add credentials](#5-add-credentials)
65 | 1. [Add the CSV file](#6-add-the-csv-file)
66 | 1. [Run the notebook](#7-run-the-notebook)
67 | 1. [Add a dashboard to the project](#8-add-a-dashboard-to-the-project)
68 | 1. [Analyze the results](#9-analyze-the-results)
69 |
70 | ### 1. Clone the repo
71 |
72 | Clone the `pixiedust-facebook-analysis` repo locally. In a terminal, run the following command:
73 |
74 | ```bash
75 | git clone https://github.com/IBM/pixiedust-facebook-analysis.git
76 | ```
77 |
78 | ### 2. Create a new Watson Studio project
79 |
80 | * Log into IBM's [Watson Studio](https://dataplatform.cloud.ibm.com). Once in, you'll land on the dashboard.
81 |
82 | * Create a new project by clicking `New project +` and then click on `Create an empty project`.
83 |
84 | * Enter a project name.
85 |
86 | * Choose an existing Object Storage instance or create a new one.
87 |
88 | * Click `Create`.
89 |
90 | * Upon a successful project creation, you are taken to the project `Overview` tab. Take note of the `Assets` and `Settings` tabs, we'll be using them to associate our project with any external assets (datasets and notebooks) and any IBM cloud services.
91 |
92 | 
93 |
94 | ### 3. Add services to the project
95 |
96 | * Associate the project with Watson services. To create an instance of each service, go to the `Settings` tab in the new project and scroll down to `Associated Services`. Click `Add service` and select `Watson` from the drop-down menu. Add the service using the free `Lite` plan. Repeat for each of the services used in this pattern:
97 |
98 | * Natural Language Understanding
99 | * Visual Recognition (optional)
100 |
101 | * Once your services are created, copy the credentials and save them for later. You will use them in your Jupyter notebook.
102 |
103 | * Use the upper-left `☰` menu, and select `Services > My Services`.
104 | * Use the 3-dot actions menu to select `Manage in IBM Cloud` for each service.
105 | * Copy each `API key` and `URL` to use in the notebook.
106 |
107 | ### 4. Create the notebook in Watson Studio
108 |
109 | * Go back to your Watson Studio project by using your browser's back button or use the upper-left `☰` menu, and select `Projects` and open your project.
110 |
111 | * Select the `Overview` tab, click `Add to project +` on the top right and choose the `Notebook` asset type.
112 |
113 | 
114 |
115 | * Fill in the following information:
116 |
117 | * Select the `From URL` tab. [1]
118 | * Enter a `Name` for the notebook and optionally a description. [2]
119 | * For `Select runtime` select the `Default Python 3.6 Free` option. [3]
120 | * Under `Notebook URL` provide the following url [4]:
121 |
122 | ```url
123 | https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/master/notebooks/pixiedust_facebook_analysis.ipynb
124 | ```
125 |
126 | 
127 |
128 | * Click the `Create notebook` button.
129 |
130 | > **TIP:** Your notebook will appear in the `Notebooks` section of the `Assets` tab.
131 |
132 | ### 5. Add credentials
133 |
134 | Find the notebook cell after `1.5. Add Service Credentials From IBM Cloud for Watson Services`.
135 |
136 | Set the API key and URL for each service.
137 |
138 | 
139 |
140 | > **Note**: This cell is marked as a `hidden_cell` because it will contain sensitive credentials.
141 |
142 | ### 6. Add the CSV file
143 |
144 | #### Add the CSV file to the notebook
145 |
146 | Use `Find and Add Data` (look for the `01/00` icon) and its `Files` tab. From there you can click `browse` and add a `.csv` file from your computer.
147 |
148 | 
149 |
150 | > **Note**: If you don't have your own data, you can use our example by cloning this git repo. Look in the `data` directory.
151 |
152 | #### Insert to code
153 |
154 | Find the notebook cell after `2.1 Load data from Object Storage`. Place your cursor after `# **Insert to code > Insert pandas DataFrame**`. Make sure this cell is selected before inserting code.
155 |
156 | Using the file that you added above (under the `01/00` Files tab), use the `Insert to code` drop-down menu. Select `pandas DataFrame` from the drop-down menu.
157 |
158 | 
159 |
160 | > **Note**: This cell is marked as a `hidden_cell` because it contains
161 | sensitive credentials.
162 |
163 | 
164 |
165 | #### Fix-up df variable name
166 |
167 | The inserted code includes a generated method with credentials and then calls the generated method to set a variable with a name like `df_data_1`. If you do additional inserts, the method can be re-used and the variable will change (e.g. `df_data_2`).
168 |
169 | Later in the notebook, we set `df = df_data_1`. So you might need to fix the variable name `df_data_1` to match your inserted code or vice versa.
170 |
171 | #### Add file credentials
172 |
173 | We want to write the enriched file to the same container that we used above. So now we'll use the same file drop-down to insert credentials. We'll use them later when we write out the enriched CSV file.
174 |
175 | After the `df` setup, there is a cell to enter the file credentials. Place your cursor after the `# insert credentials for file - Change to credentials_1` line. Make sure this cell is selected before inserting credentials.
176 |
177 | Use the CSV file's drop-down menu again. This time select `Insert Credentials`.
178 |
179 | 
180 |
181 | > **Note**: This cell is marked as a `hidden_cell` because it contains sensitive credentials.
182 |
183 | #### Fix-up credentials variable name
184 |
185 | The inserted code includes a dictionary with credentials assigned to a variable with a name like `credentials_1`. It may have a different name (e.g. `credentials_2`). Rename it or reassign it if needed. The notebook code assumes it will be `credentials_1`.
186 |
187 | ## 7. Run the notebook
188 |
189 | When a notebook is executed, what is actually happening is that each code cell in the notebook is executed, in order, from top to bottom.
190 |
191 | Each code cell is selectable and is preceded by a tag in the left margin. The tag format is `In [x]:`. Depending on the state of the notebook, the `x` can be:
192 |
193 | * A blank, this indicates that the cell has never been executed.
194 | * A number, this number represents the relative order this code step was executed.
195 | * A `*`, this indicates that the cell is currently executing.
196 |
197 | There are several ways to execute the code cells in your notebook:
198 |
199 | * One cell at a time.
200 | * Select the cell, and then press the `Play` button in the toolbar.
201 | * Batch mode, in sequential order.
202 | * From the `Cell` menu bar, there are several options available. For example, you can `Run All` cells in your notebook, or you can `Run All Below`, that will start executing from the first cell under the currently selected cell, and then continue executing all cells that follow.
203 | * At a scheduled time.
204 | * Press the `Schedule` button located in the top right section of your notebook panel. Here you can schedule your notebook to be executed once at some future time, or repeatedly at your specified interval.
205 |
206 | ## 8. Add a dashboard to the project
207 |
208 | ### Add the enriched data as a project data asset
209 |
210 | * Go to the `Assets` tab in the your Watson Studio project click on the `01/00` (Find and add data) icon.
211 | * Select the `enriched_example_facebook_data.csv` file and use the 3-dot pull-down to select `Add as data asset`.
212 |
213 | ### Associate the project with a Dashboard service
214 |
215 | * Go to the `Settings` tab in the new project and scroll down to `Associated Services`.
216 | * Click `Add service` and select `Dashboard` from the drop-down menu.
217 | * Create the service using the free `Lite` plan.
218 |
219 | ### Load the provided dashboard.json file
220 |
221 | * Click the `Add to project +` button and select `Dashboard`.
222 | * Select the `From file` tab and use the `Select file` button to open the file `dashboards/dashboard.json` from your local repo.
223 | * Select your Cognos Dashboard Embedded service from the list.
224 | * Hit `Create`.
225 | * If you are asked to re-link the data set, select your `enriched_example_facebook_data.csv` asset.
226 |
227 | ## 9. Analyze the results
228 |
229 | If you walk through the cells, you will see that we demonstrated how to do the following:
230 |
231 | * Install external libraries from PyPI
232 | * Create clients to connect to Watson cognitive services
233 | * Load data from a local CSV file to a pandas DataFrame (via Object Storage)
234 | * Do some data manipulation with pandas
235 | * Use BeautifulSoup
236 | * Use Natural Language Understanding
237 | * Use Visual Recognition
238 | * Save the enriched data in a CSV file in Object Storage
239 |
240 | When you try the dashboard, you will see:
241 |
242 | * How to add a dashboard to a Watson Studio project
243 | * How to import a dashboard JSON file
244 | * Linking a dashboard to data saved in Cloud Object Storage
245 | * An example with tabs and a variety of charts
246 | * A dashboard tool that you can use to explore your data and create new visualizations to share
247 |
248 | ## Sample output
249 |
250 | The provided dashboard uses four tabs to show four simple charts:
251 |
252 | * Emotion
253 | * Sentiment
254 | * Entities
255 | * Keywords
256 |
257 | The enriched data contains emotions, sentiment, entities, and keywords that were added using Natural Language Understanding to process the posts, links, and thumbnails. Combining the enrichment with the metrics from Facebook gives us a huge number of options for what we could show on the dashboard. The dashboard editor also allows you great flexibility on how you arrange your dashboard and visualize your data. The example demonstrates the following:
258 |
259 | * A word-cloud showing the keywords sized by total impressions and using color to show the sentiment
260 |
261 | 
262 |
263 | * A pie chart showing total reach by emotion
264 |
265 | 
266 |
267 | * A stacked bar chart showing likes, shares, and comments by post sentiment
268 |
269 | 
270 |
271 | * A bar chart with a line overlay, showing total impressions and paid impressions by mentioned entity
272 |
273 | 
274 |
275 | ## License
276 |
277 | This code pattern is licensed under the Apache License, Version 2. Separate third-party code objects invoked within this code pattern are licensed by their respective providers pursuant to their own separate licenses. Contributions are subject to the [Developer Certificate of Origin, Version 1.1](https://developercertificate.org/) and the [Apache License, Version 2](https://www.apache.org/licenses/LICENSE-2.0.txt).
278 |
279 | [Apache License FAQ](https://www.apache.org/foundation/license-faq.html#WhatDoesItMEAN)
280 |
--------------------------------------------------------------------------------
/dashboards/dashboard.json:
--------------------------------------------------------------------------------
1 | {"name":"New dashboard","layout":{"id":"page0","items":[{"id":"page1","items":[{"id":"page2","css":"templateBox aspectRatio_default","items":[{"id":"page3","style":{"top":"0%","left":"0%","right":"75%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1","relatedLayouts":""},{"id":"page4","css":"noBorderLeft","style":{"top":"0%","left":"25%","right":"50%","bottom":"75%"},"type":"templateDropZone","templateName":"dz3"},{"id":"page5","css":"noBorderLeft","style":{"top":"0%","left":"50%","right":"25%","bottom":"75%"},"type":"templateDropZone","templateName":"dz4"},{"id":"page6","css":"noBorderLeft","style":{"top":"0%","left":"75%","right":"00%","bottom":"75%"},"type":"templateDropZone","templateName":"dz5"},{"id":"page7","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2","relatedLayouts":""},{"id":"model000001737d90789a_00000000","style":{"top":"10.51930758988016%","left":"0.1466275659824047%","height":"49.80026631158455%","width":"46.18768328445748%"},"type":"widget","relatedLayouts":""},{"id":"model000001737d9504c7_00000000","style":{"width":"49.853372434017594%","height":"10.252996005326231%","top":"0.2663115845539281%","left":"0.1466275659824047%","transform":"matrix(0.999999, 0.00123007, -0.00123007, 0.999999, 0, 0)","-webkit-transform":"matrix(0.999999, 0.00123007, -0.00123007, 0.999999, 0, 0)"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Emotion"}},"templateName":"Template4"},{"id":"model000001737d8fcb55_00000000","items":[{"id":"model000001737d8fcb56_00000000","css":"templateBox aspectRatio_default","items":[{"id":"model000001737d8fcb56_00000001","style":{"top":"0%","left":"0%","right":"75%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1","relatedLayouts":""},{"id":"model000001737d8fcb57_00000000","css":"noBorderLeft","style":{"top":"0%","left":"25%","right":"50%","bottom":"75%"},"type":"templateDropZone","templateName":"dz3"},{"id":"model000001737d8fcb57_00000001","css":"noBorderLeft","style":{"top":"0%","left":"50%","right":"25%","bottom":"75%"},"type":"templateDropZone","templateName":"dz4"},{"id":"model000001737d8fcb57_00000002","css":"noBorderLeft","style":{"top":"0%","left":"75%","right":"00%","bottom":"75%"},"type":"templateDropZone","templateName":"dz5"},{"id":"model000001737d8fcb57_00000003","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2"},{"id":"model000001737d96e769_00000000","style":{"width":"66.49560117302053%","height":"9.986684420772304%","top":"0.13315579227696406%","left":"0.07331378299120235%","transform":"matrix(0.999993, 0.00368788, -0.00368788, 0.999993, 0, 0)","-webkit-transform":"matrix(0.999993, 0.00368788, -0.00368788, 0.999993, 0, 0)"},"type":"widget","relatedLayouts":""},{"id":"model000001737d9792c5_00000000","style":{"left":"0.07331378299120235%","top":"9.85352862849534%","height":"38.348868175765645%","width":"86.43695014662757%"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Sentiment"}},"templateName":"Template4"},{"id":"model000001737df944ca_00000000","items":[{"id":"model000001737df944cb_00000000","css":"templateBox aspectRatio_default","items":[{"id":"model000001737df944cb_00000001","style":{"top":"0%","left":"0%","right":"0%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1"},{"id":"model000001737df944cb_00000002","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2","relatedLayouts":""},{"id":"model000001737df97403_00000000","style":{"width":"47.800586510263926%","height":"9.454061251664447%","left":"0.07331378299120235%","top":"0.13315579227696406%"},"type":"widget","relatedLayouts":""},{"id":"model000001737df9f2c0_00000000","style":{"left":"0.07331378299120235%","top":"9.587217043941411%","height":"43.00932090545939%","width":"99.9266862170088%"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Entities"}},"templateName":"Template2"},{"id":"model000001737e05eb20_00000000","items":[{"id":"model000001737e05eb21_00000000","css":"templateBox aspectRatio_default","items":[{"id":"model000001737e05eb21_00000001","style":{"top":"0%","left":"0%","right":"0%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1","relatedLayouts":""},{"id":"model000001737e05eb21_00000002","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2","relatedLayouts":""},{"id":"model000001737e063c9c_00000000","style":{"left":"0.07331378299120235%","top":"16.37816245006658%","height":"70.97203728362184%","width":"68.841642228739%"},"type":"widget","relatedLayouts":""},{"id":"model000001737e07fcd2_00000000","style":{"width":"70.01466275659824%","height":"10.652463382157125%","left":"0.07331378299120235%","top":"1.8641810918774966%"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Keywords"}},"templateName":"Template2"}],"style":{"height":"100%"},"type":"tab"},"theme":"defaultTheme","version":1009,"eventGroups":[{"id":"page1:1","widgetIds":["model000001737d90789a_00000000"]},{"id":"model000001737d8fcb55_00000000:1","widgetIds":["model000001737d9792c5_00000000"]},{"id":"model000001737df944ca_00000000:1","widgetIds":["model000001737df9f2c0_00000000"]},{"id":"model000001737e05eb20_00000000:1","widgetIds":["model000001737e063c9c_00000000"]}],"properties":{"defaultLocale":"Default"},"dataSources":{"version":"1.0","sources":[{"id":"model000001737d8ed155_00000001","assetId":"assetId000001737d8ed154_00000000","clientId":"41cdf9f5-0401-49b3-8f31-31e986ac3625:747b0a56-b605-4bff-8cdb-3b30822c9295:747b0a56-b605-4bff-8cdb-3b30822c9295:data_asset:enriched_example_facebook_data.csv","module":{"xsd":"https://ibm.com/daas/module/1.0/module.xsd","source":{"id":"41cdf9f5-0401-49b3-8f31-31e986ac3625:747b0a56-b605-4bff-8cdb-3b30822c9295:747b0a56-b605-4bff-8cdb-3b30822c9295:data_asset:enriched_example_facebook_data.csv","srcUrl":{"sourceUrl":"{enc}fH+x5OzLtjJ0G9vSAwJ48i7ydshJYh0CBG4b4CUkDXQpmSlw1v56uYA7dwoeHokqnmhDRvIDiK7eAPHWvgCvHD65ss8nNoGVIHwqwSsxIiE+5zc4AySEUWqPubddjgKGU4qP8IsF32ZvC338eNZpMvke9EyfArgbDrMizJHPR4Fob0v6uyeMj7iCUJlcs7WqnVeWM0nM89Nk2CrZ/rsZOoTKAulIsBfHgMKNa0QqVDlBhCls9ds32hEjwJDr1dnhRdF+UfataJk+0B8i9db4yYRcvEJERzNiXPZ3jTdEXdj1oZDcR48BKcHmeX4zii3A/CEQkXCoV3fhfVqgvn4qlg==","mimeType":"text/csv","property":[{"name":"headers","value":[{"name":"Authorization","value":"{enc}RYdkdiC0Nt73AYv8fl/cNuogEMfUjfwqqkzaFOvAl5RrI0pRUuSKxFqogZ4nMTTmemX78DVrom45zSRWxNInJ5xEnh6qNbNRFzKzbwbKudlQjZE2xlGOXDfCgkDDo0dE92eeqaAp7QRa3ZM7orGurMGMeWUPdjsnJRAuGZoTGZWCN9ub9BDGPKAnrNP6xSZ7rPQrdo6b91Xrv5UDr3Hutb0Jrb6/+F4+ugzvp1iVWBWjw23hQKMDDEUlYw5m1T+6HW6R6LJyHG68s5qmCGbPWV4ovsVm3MJEBE5eIuX4mchHm/sMEu0H0fKV0rZ46N6pi892tIwNU8hRcWjR+1zmVw=="}]}]}},"table":{"column":[{"datatype":"NVARCHAR(512)","name":"Link","label":"Link","description":"Link","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Post_ID","label":"Post ID","description":"Post ID","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Permalink","label":"Permalink","description":"Permalink","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Text","label":"Text","description":"Text","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Type","label":"Type","description":"Type","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Countries","label":"Countries","description":"Countries","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Languages","label":"Languages","description":"Languages","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Posted","label":"Posted","description":"Posted","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Audience_Targeting","label":"Audience Targeting","description":"Audience Targeting","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Total_Reach","label":"Lifetime Post Total Reach","description":"Lifetime Post Total Reach","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_organic_reach","label":"Lifetime Post organic reach","description":"Lifetime Post organic reach","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Paid_Reach","label":"Lifetime Post Paid Reach","description":"Lifetime Post Paid Reach","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Total_Impressions","label":"Lifetime Post Total Impressions","description":"Lifetime Post Total Impressions","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Organic_Impressions","label":"Lifetime Post Organic Impressions","description":"Lifetime Post Organic Impressions","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Paid_Impressions","label":"Lifetime Post Paid Impressions","description":"Lifetime Post Paid Impressions","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Engaged_Users","label":"Lifetime Engaged Users","description":"Lifetime Engaged Users","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Matched_Audience_Targeting_Consumers_on_Post","label":"Lifetime Matched Audience Targeting Consumers on Post","description":"Lifetime Matched Audience Targeting Consumers on Post","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Matched_Audience_Targeting_Consumptions_on_Post","label":"Lifetime Matched Audience Targeting Consumptions on Post","description":"Lifetime Matched Audience Targeting Consumptions on Post","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Negative_Feedback_from_Users","label":"Lifetime Negative Feedback from Users","description":"Lifetime Negative Feedback from Users","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Negative_Feedback","label":"Lifetime Negative Feedback","description":"Lifetime Negative Feedback","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Impressions_by_people_who_have_liked_your_Page","label":"Lifetime Post Impressions by people who have liked your Page","description":"Lifetime Post Impressions by people who have liked your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_reach_by_people_who_like_your_Page","label":"Lifetime Post reach by people who like your Page","description":"Lifetime Post reach by people who like your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Paid_Impressions_by_people_who_have_liked_your_Page","label":"Lifetime Post Paid Impressions by people who have liked your Page","description":"Lifetime Post Paid Impressions by people who have liked your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_reach_of_a_post_by_people_who_like_your_Page","label":"Lifetime Paid reach of a post by people who like your Page","description":"Lifetime Paid reach of a post by people who like your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_People_who_have_liked_your_Page_and_engaged_with_your_post","label":"Lifetime People who have liked your Page and engaged with your post","description":"Lifetime People who have liked your Page and engaged with your post","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Organic_views_to_95_","label":"Lifetime Organic views to 95%","description":"Lifetime Organic views to 95%","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Organic_views_to_95_1","label":"Lifetime Organic views to 95%.1","description":"Lifetime Organic views to 95%.1","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_views_to_95_","label":"Lifetime Paid views to 95%","description":"Lifetime Paid views to 95%","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_views_to_95_1","label":"Lifetime Paid views to 95%.1","description":"Lifetime Paid views to 95%.1","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Organic_Video_Views","label":"Lifetime Organic Video Views","description":"Lifetime Organic Video Views","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Organic_Video_Views_1","label":"Lifetime Organic Video Views.1","description":"Lifetime Organic Video Views.1","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_Video_Views","label":"Lifetime Paid Video Views","description":"Lifetime Paid Video Views","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_Video_Views_1","label":"Lifetime Paid Video Views.1","description":"Lifetime Paid Video Views.1","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Average_time_video_viewed","label":"Lifetime Average time video viewed","description":"Lifetime Average time video viewed","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Video_length","label":"Lifetime Video length","description":"Lifetime Video length","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Talking_About_This_Post_by_action_type_share","label":"Lifetime Talking About This (Post) by action type - share","description":"Lifetime Talking About This (Post) by action type - share","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Talking_About_This_Post_by_action_type_like","label":"Lifetime Talking About This (Post) by action type - like","description":"Lifetime Talking About This (Post) by action type - like","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Talking_About_This_Post_by_action_type_comment","label":"Lifetime Talking About This (Post) by action type - comment","description":"Lifetime Talking About This (Post) by action type - comment","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Stories_by_action_type_share","label":"Lifetime Post Stories by action type - share","description":"Lifetime Post Stories by action type - share","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Post_Stories_by_action_type_like","label":"Lifetime Post Stories by action type - like","description":"Lifetime Post Stories by action type - like","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Post_Stories_by_action_type_comment","label":"Lifetime Post Stories by action type - comment","description":"Lifetime Post Stories by action type - comment","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_other_clicks","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - other clicks","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - other clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_link_clicks","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - link clicks","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - link clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_photo_view","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - photo view","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - photo view","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_video_play","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - video play","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - video play","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_other_clicks","label":"Lifetime Matched Audience Targeting Consumptions by Type - other clicks","description":"Lifetime Matched Audience Targeting Consumptions by Type - other clicks","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_link_clicks","label":"Lifetime Matched Audience Targeting Consumptions by Type - link clicks","description":"Lifetime Matched Audience Targeting Consumptions by Type - link clicks","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_photo_view","label":"Lifetime Matched Audience Targeting Consumptions by Type - photo view","description":"Lifetime Matched Audience Targeting Consumptions by Type - photo view","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_video_play","label":"Lifetime Matched Audience Targeting Consumptions by Type - video play","description":"Lifetime Matched Audience Targeting Consumptions by Type - video play","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_from_Users_by_Type_hide_all_clicks","label":"Lifetime Negative Feedback from Users by Type - hide_all_clicks","description":"Lifetime Negative Feedback from Users by Type - hide_all_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_from_Users_by_Type_hide_clicks","label":"Lifetime Negative Feedback from Users by Type - hide_clicks","description":"Lifetime Negative Feedback from Users by Type - hide_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_from_Users_by_Type_report_spam_clicks","label":"Lifetime Negative Feedback from Users by Type - report_spam_clicks","description":"Lifetime Negative Feedback from Users by Type - report_spam_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_by_Type_hide_all_clicks","label":"Lifetime Negative Feedback by Type - hide_all_clicks","description":"Lifetime Negative Feedback by Type - hide_all_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_by_Type_hide_clicks","label":"Lifetime Negative Feedback by Type - hide_clicks","description":"Lifetime Negative Feedback by Type - hide_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_by_Type_report_spam_clicks","label":"Lifetime Negative Feedback by Type - report_spam_clicks","description":"Lifetime Negative Feedback by Type - report_spam_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Thumbnails","label":"Thumbnails","description":"Thumbnails","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Image","label":"Image","description":"Image","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Extended_Links","label":"Extended Links","description":"Extended Links","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextHighestEmotion","label":"TextHighestEmotion","description":"TextHighestEmotion","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"TextHighestEmotionScore","label":"TextHighestEmotionScore","description":"TextHighestEmotionScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextOverallSentimentType","label":"TextOverallSentimentType","description":"TextOverallSentimentType","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"VARCHAR(64)","name":"TextOverallSentimentScore","label":"TextOverallSentimentScore","description":"TextOverallSentimentScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextKeywords","label":"TextKeywords","description":"TextKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextEntities","label":"TextEntities","description":"TextEntities","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxTextKeywords","label":"MaxTextKeywords","description":"MaxTextKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxTextEntity","label":"MaxTextEntity","description":"MaxTextEntity","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailHighestEmotion","label":"ThumbnailHighestEmotion","description":"ThumbnailHighestEmotion","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"ThumbnailHighestEmotionScore","label":"ThumbnailHighestEmotionScore","description":"ThumbnailHighestEmotionScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailOverallSentimentType","label":"ThumbnailOverallSentimentType","description":"ThumbnailOverallSentimentType","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"VARCHAR(64)","name":"ThumbnailOverallSentimentScore","label":"ThumbnailOverallSentimentScore","description":"ThumbnailOverallSentimentScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailKeywords","label":"ThumbnailKeywords","description":"ThumbnailKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailEntities","label":"ThumbnailEntities","description":"ThumbnailEntities","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxThumbnailKeywords","label":"MaxThumbnailKeywords","description":"MaxThumbnailKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxThumbnailEntity","label":"MaxThumbnailEntity","description":"MaxThumbnailEntity","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkHighestEmotion","label":"LinkHighestEmotion","description":"LinkHighestEmotion","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"LinkHighestEmotionScore","label":"LinkHighestEmotionScore","description":"LinkHighestEmotionScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkOverallSentimentType","label":"LinkOverallSentimentType","description":"LinkOverallSentimentType","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"LinkOverallSentimentScore","label":"LinkOverallSentimentScore","description":"LinkOverallSentimentScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkKeywords","label":"LinkKeywords","description":"LinkKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkEntities","label":"LinkEntities","description":"LinkEntities","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Article_Text","label":"Article Text","description":"Article Text","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxLinkKeywords","label":"MaxLinkKeywords","description":"MaxLinkKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxLinkEntity","label":"MaxLinkEntity","description":"MaxLinkEntity","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true}],"name":"enriched_example_facebook_data_csv","label":"enriched_example_facebook_data.csv"},"id":"41cdf9f5-0401-49b3-8f31-31e986ac3625:747b0a56-b605-4bff-8cdb-3b30822c9295:747b0a56-b605-4bff-8cdb-3b30822c9295:data_asset:enriched_example_facebook_data.csv","label":"enriched_example_facebook_data.csv","identifier":"enriched_example_facebook_data_csv"},"name":"enriched_example_facebook_data.csv","shaping":{"embeddedModuleUpToDate":true}}]},"pageContext":[],"drillThrough":[],"widgets":{"model000001737d90789a_00000000":{"id":"model000001737d90789a_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737d91e983_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Total_Reach","itemLabel":"Lifetime Post Total Reach"},{"id":"model000001737d938ceb_00000000","itemId":"enriched_example_facebook_data_csv.TextHighestEmotion","itemLabel":"TextHighestEmotion"}],"id":"model000001737d91e982_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737d938ceb_00000000"],"dataItemSettings":[],"caption":"Segments","id":"categories","layerId":"data"},{"name":"values","dataItems":["model000001737d91e983_00000000"],"caption":"Size","id":"values","layerId":"data"}]},"type":"live","visId":"com.ibm.vis.rave2bundlepie","name":{"translationTable":{}}},"model000001737d9504c7_00000000":{"id":"model000001737d9504c7_00000000","type":"text","content":{"translationTable":{"Default":"
"}},"isResponsive":true,"visTypeLocked":true,"name":""},"model000001737d96e769_00000000":{"id":"model000001737d96e769_00000000","type":"text","content":{"translationTable":{"Default":"Likes, Shares, and Comments by Sentiment
"}},"isResponsive":true,"visTypeLocked":true,"name":""},"model000001737d9792c5_00000000":{"id":"model000001737d9792c5_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737d9c7fcb_00000000","itemId":"enriched_example_facebook_data_csv.TextOverallSentimentType","itemLabel":"TextOverallSentimentType"},{"id":"_multiMeasuresSeries","itemId":"_multiMeasuresSeries","itemLabel":"Measures group (3)"},{"id":"model000001737df516f1_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Talking_About_This_Post_by_action_type_share","itemLabel":"Lifetime Talking About This (Post) by action type - share"},{"id":"model000001737df5829e_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Talking_About_This_Post_by_action_type_like","itemLabel":"Lifetime Talking About This (Post) by action type - like"},{"id":"model000001737df58e90_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Talking_About_This_Post_by_action_type_comment","itemLabel":"Lifetime Talking About This (Post) by action type - comment"}],"id":"model000001737d988928_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737d9c7fcb_00000000"],"dataItemSettings":[],"caption":"Bars","id":"categories","layerId":"data"},{"name":"values","dataItems":["model000001737df516f1_00000000","model000001737df5829e_00000000","model000001737df58e90_00000000"],"caption":"Length","id":"values","layerId":"data"},{"name":"color","dataItems":["_multiMeasuresSeries"],"caption":"Color","id":"color"}]},"type":"live","visId":"com.ibm.vis.rave2bundlestackedbar","name":{"translationTable":{}},"localFilters":[]},"model000001737df97403_00000000":{"id":"model000001737df97403_00000000","type":"text","content":{"translationTable":{"Default":""}},"isResponsive":true,"visTypeLocked":true,"name":""},"model000001737df9f2c0_00000000":{"id":"model000001737df9f2c0_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737e01e0ee_00000000","itemId":"enriched_example_facebook_data_csv.MaxTextEntity","itemLabel":"MaxTextEntity"},{"id":"model000001737e03f567_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Total_Impressions","itemLabel":"Lifetime Post Total Impressions"},{"id":"model000001737e046601_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Paid_Impressions","itemLabel":"Lifetime Post Paid Impressions"}],"id":"model000001737dfa4df9_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737e01e0ee_00000000"],"dataItemSettings":[],"caption":"x-axis","id":"categories","layerId":"data"},{"name":"columnValue","dataItems":["model000001737e03f567_00000000"],"caption":"Length","id":"columnValue","layerId":"data"},{"name":"lineValue","dataItems":["model000001737e046601_00000000"],"caption":"Line position","id":"lineValue"}]},"type":"live","visId":"com.ibm.vis.rave2bundlecomposite","name":{"translationTable":{}},"localFilters":[]},"model000001737e063c9c_00000000":{"id":"model000001737e063c9c_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737e065fc7_00000000","itemId":"enriched_example_facebook_data_csv.MaxLinkKeywords","itemLabel":"MaxLinkKeywords"},{"id":"model000001737e06cdee_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Total_Impressions","itemLabel":"Lifetime Post Total Impressions"},{"id":"model000001737e07229a_00000000","itemId":"enriched_example_facebook_data_csv.LinkOverallSentimentType","itemLabel":"LinkOverallSentimentType"}],"id":"model000001737e065fc6_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737e065fc7_00000000"],"dataItemSettings":[],"caption":"Words","id":"categories"},{"name":"size","dataItems":["model000001737e06cdee_00000000"],"caption":"Size","id":"size"},{"name":"color","dataItems":["model000001737e07229a_00000000"],"caption":"Color","id":"color"}]},"type":"live","visId":"com.ibm.vis.rave2bundlewordcloud","name":{"translationTable":{}}},"model000001737e07fcd2_00000000":{"id":"model000001737e07fcd2_00000000","type":"text","content":{"translationTable":{"Default":"Keyword-cloud showing impressions and sentiment
"}},"isResponsive":true,"visTypeLocked":true,"name":""}}}
--------------------------------------------------------------------------------
/doc/source/images/add_credentials.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/add_credentials.png
--------------------------------------------------------------------------------
/doc/source/images/add_file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/add_file.png
--------------------------------------------------------------------------------
/doc/source/images/add_notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/add_notebook.png
--------------------------------------------------------------------------------
/doc/source/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/architecture.png
--------------------------------------------------------------------------------
/doc/source/images/emotion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/emotion.png
--------------------------------------------------------------------------------
/doc/source/images/emotional_engagement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/emotional_engagement.png
--------------------------------------------------------------------------------
/doc/source/images/entities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/entities.png
--------------------------------------------------------------------------------
/doc/source/images/insert_file_credentials.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/insert_file_credentials.png
--------------------------------------------------------------------------------
/doc/source/images/insert_to_code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/insert_to_code.png
--------------------------------------------------------------------------------
/doc/source/images/inserted_pandas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/inserted_pandas.png
--------------------------------------------------------------------------------
/doc/source/images/keywords.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/keywords.png
--------------------------------------------------------------------------------
/doc/source/images/new_notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/new_notebook.png
--------------------------------------------------------------------------------
/doc/source/images/sentiment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/sentiment.png
--------------------------------------------------------------------------------
/doc/source/images/sentimental_engagement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/sentimental_engagement.png
--------------------------------------------------------------------------------
/doc/source/images/studio_project_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/studio_project_overview.png
--------------------------------------------------------------------------------
/manifest.yml:
--------------------------------------------------------------------------------
1 | declared-services:
2 | pfa-visual-recognition:
3 | label: watson_vision_combined
4 | plan: lite
5 | pfa-natural-language-understanding:
6 | label: natural-language-understanding
7 | plan: free
8 | pfa-tone-analyzer:
9 | label: tone_analyzer
10 | plan: lite
11 | applications:
12 | - services:
13 | - pfa-visual-recognition
14 | - pfa-natural-language-understanding
15 | - pfa-tone-analyzer
16 | memory: 128M
17 | no-route: true
18 | name: pixiedust-facebook-analysis
19 | health-check-type: none
20 | buildpack: noop-buildpack
21 |
--------------------------------------------------------------------------------
/notebooks/pixiedust_facebook_analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true
7 | },
8 | "source": [
9 | "# Analyze Facebook Data Using IBM Watson and IBM Watson Studio\n",
10 | "\n",
11 | "This is a three-part notebook meant to show how anyone can enrich and analyze a combined dataset of unstructured and structured information with IBM Watson and IBM Watson Studio. For this example we are using a standard Facebook Analytics export which features texts from posts, articles and thumbnails, along with standard performance metrics such as likes, shares, and impressions. \n",
12 | "\n",
13 | "**Part I** will use the Natural Language Understanding and (optionally) Visual Recognition services from IBM Watson to enrich the Facebook posts, thumbnails, and articles by pulling out `Sentiment`, `Emotion`, `Entities`, `Keywords`, and `Images`. The end result of Part I will be additional features and metrics we can visualize in Part III. \n",
14 | "\n",
15 | "**Part II** will set up multiple pandas DataFrames that will contain the values, and metrics needed to find insights in the Part III tests and experiments.\n",
16 | "\n",
17 | "**Part III** will use charts to visualize the features that we discovered during enrichment and show how they correlate with customer impressions.\n",
18 | "\n",
19 | "\n",
20 | "#### You should only need to change data in the Setup portion of this notebook. All places where you see User Input is where you should be adding inputs. \n",
21 | "\n",
22 | "### Table of Contents\n",
23 | "\n",
24 | "### [**Part I - Enrich**](#part1)
\n",
25 | "1. [Setup](#setup)
\n",
26 | " 1.1 [Install Watson Developer Cloud and BeautifulSoup Packages](#setup1)
\n",
27 | " 1.2 [Install PixieDust](#pixie)
\n",
28 | " 1.3 [Restart Kernel](#restart)
\n",
29 | " 1.4 [Import Packages and Libraries](#setup2)
\n",
30 | " 1.5 [Add Service Credentials From IBM Cloud for Watson Services](#setup3)
\n",
31 | "2. [Load Data](#load)
\n",
32 | " 2.1 [Load Data From Cloud Object Storage as a pandas DataFrame](#load1)
\n",
33 | " 2.2 [Set Variables](#load2)
\n",
34 | "3. [Prepare Data](#prepare)
\n",
35 | " 3.1 [Data Cleansing with Python](#prepare1)
\n",
36 | " 3.2 [Beautiful Soup to Extract Thumbnails and Extented Links](#prepare2)
\n",
37 | "4. [Enrich Data](#enrich)
\n",
38 | " 4.1 [NLU for Post Text](#nlupost)
\n",
39 | " 4.2 [NLU for Thumbnail Text](#nlutn)
\n",
40 | " 4.3 [NLU for Article Text](#nlulink)
\n",
41 | " 4.4 [Visual Recognition](#visual)
\n",
42 | "5. [Write Data](#write)
\n",
43 | " 5.1 [Convert DataFrame to new CSV](#write1)
\n",
44 | " 5.2 [Write Data to Cloud Object Storage](#write2)
\n",
45 | " \n",
46 | "### [**Part II - Data Preparation**](#part2)
\n",
47 | "1. [Prepare Data](#prepare)
\n",
48 | " 1.1 [Create Multiple DataFrames for Visualizations](#visualizations)
\n",
49 | " 1.2 [Create a Consolidated Sentiment and Emotion DataFrame](#tone)
\n",
50 | " 1.3 [Create a Consolidated Keyword DataFrame](#keyword)
\n",
51 | " 1.4 [Create a Consolidated Entity DataFrame](#entity)
\n",
52 | " \n",
53 | "### [**Part III - Analyze**](#part3)
\n",
54 | "\n",
55 | "1. [Setup](#2setup)
\n",
56 | " 1.1 [Assign Variables](#2setup2)
\n",
57 | "2. [Visualize Data](#2visual)
\n",
58 | " 2.1 [Run PixieDust Visualization Library with Display() API](#2visual1)\n"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "# Part I - Enrich\n",
66 | "\n",
67 | "## 1. Setup\n",
68 | "To prepare your environment, you need to install some packages and enter credentials for the Watson services.\n",
69 | "\n",
70 | "### 1.1 Install Latest Watson Developer Cloud and Beautiful Soup Packages\n",
71 | "You need to install these packages:\n",
72 | " - [Watson APIs Python SDK](https://github.com/watson-developer-cloud/python-sdk): a client library for Watson services.\n",
73 | " - Beautiful Soup: a library to parse data from HTML for enriching the Facebook data.\n",
74 | " - PixieDust: a library to visualize the data. \n",
75 | "\n",
76 | "Install the Watson Python SDK package:"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "!pip -q install --user --no-warn-script-location ibm-watson==4.3.0"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "Install the Beautiful Soup package:"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "!pip -q install --user beautifulsoup4==4.8.2"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "\n",
109 | "### 1.2 Install PixieDust Library"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "!pip -q install --user --no-warn-script-location --upgrade pixiedust==1.1.14"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "\n",
126 | "### 1.3 Restart Kernel\n",
127 | "> Required after installs/upgrades only.\n",
128 | "\n",
129 | "If any libraries were just installed or upgraded, restart the kernel before continuing. After this has been done once, you might want to comment out the `!pip install` lines above for cleaner output and a faster \"Run All\"."
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "\n",
137 | "### 1.4 Import Packages and Libraries\n",
138 | "> Tip: To check if you have a package installed, open a new cell and write: `help()`."
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "import json\n",
148 | "import sys\n",
149 | "\n",
150 | "from ibm_watson import NaturalLanguageUnderstandingV1\n",
151 | "from ibm_watson import VisualRecognitionV3\n",
152 | "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator\n",
153 | "from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, EmotionOptions, SentimentOptions\n",
154 | "\n",
155 | "import operator\n",
156 | "from functools import reduce\n",
157 | "from io import StringIO\n",
158 | "import numpy as np\n",
159 | "from bs4 import BeautifulSoup as bs\n",
160 | "from operator import itemgetter\n",
161 | "from os.path import join, dirname\n",
162 | "import pandas as pd\n",
163 | "import numpy as np\n",
164 | "import requests\n",
165 | "\n",
166 | "# Suppress some pandas warnings\n",
167 | "pd.options.mode.chained_assignment = None # default='warn'\n",
168 | "# Suppress SSL warnings\n",
169 | "requests.packages.urllib3.disable_warnings()"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "\n",
177 | "### 1.5 Add Service Credentials From IBM Cloud for Watson Services\n",
178 | "Edit the following cell to provide your credentials for Watson and Natural Language Understanding and Visual Recognition.\n",
179 | "\n",
180 | "You must create a Watson Natural Language Understanding service and, optionally, a Watson Visual Recognition service on [IBM Cloud](https://cloud.ibm.com/).\n",
181 | "\n",
182 | "1. Create a service for [Natural Language Understanding (NLU)](https://cloud.ibm.com/catalog/services/natural-language-understanding). \n",
183 | "1. Create a service for [Visual Recognition](https://cloud.ibm.com/catalog/services/visual-recognition).\n",
184 | "1. Insert API keys and URLs in the following cell.\n",
185 | "1. Run the cell.\n",
186 | "\n",
187 | "### _User Input_ "
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "# @hidden_cell\n",
197 | "\n",
198 | "# Watson Natural Language Understanding (NLU)\n",
199 | "NATURAL_LANGUAGE_UNDERSTANDING_API_KEY = ''\n",
200 | "NATURAL_LANGUAGE_UNDERSTANDING_URL = ''\n",
201 | "\n",
202 | "# Watson Visual Recognition (optional)\n",
203 | "VISUAL_RECOGNITION_API_KEY = ''\n",
204 | "VISUAL_RECOGNITION_URL = ''\n"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {},
211 | "outputs": [],
212 | "source": [
213 | "# Create the Watson clients\n",
214 | "\n",
215 | "nlu_auth = IAMAuthenticator(NATURAL_LANGUAGE_UNDERSTANDING_API_KEY)\n",
216 | "nlu = NaturalLanguageUnderstandingV1(version='2020-03-09',\n",
217 | " authenticator=nlu_auth)\n",
218 | "nlu.set_service_url(NATURAL_LANGUAGE_UNDERSTANDING_URL)\n",
219 | "\n",
220 | "visual_recognition = False # Making visrec optional.\n",
221 | "if VISUAL_RECOGNITION_API_KEY and VISUAL_RECOGNITION_URL:\n",
222 | " vr_auth = IAMAuthenticator(VISUAL_RECOGNITION_API_KEY)\n",
223 | " visual_recognition = VisualRecognitionV3(version='2019-03-09',\n",
224 | " authenticator=vr_auth)\n",
225 | " visual_recognition.set_service_url(VISUAL_RECOGNITION_URL)\n",
226 | "else:\n",
227 | " print(\"Skipping Visual Recognition\")"
228 | ]
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {},
233 | "source": [
234 | "\n",
235 | "## 2. Load Data\n",
236 | "The data you'll analyzing is a sample of a standard export of the Facebook Insights Post information from the IBM Watson Facebook page. Engagement metrics such as clicks, impressions, and so on, are altered and do not reflect actual post performance data. The data is on the Watson Studio community page.\n",
237 | "\n",
238 | "### 2.1 Load the data as a pandas DataFrame\n",
239 | "\n",
240 | "To get the data and load it into a pandas DataFrame:\n",
241 | "\n",
242 | "1. Load the file by clicking the **Find and Add Data** icon and then dragging and dropping the file onto the pane or browsing for the file. The data is stored in the object storage container that is associated with your project.\n",
243 | "1. Click in the next cell and then choose **Insert to code > pandas DataFrame** from below the file name and then run the cell. Change the inserted variable name to `df_data_1`\n",
244 | "\n",
245 | "### _User Input_ "
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": null,
251 | "metadata": {},
252 | "outputs": [],
253 | "source": [
254 | "# **Insert to code > pandas DataFrame**\n"
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {},
260 | "source": [
261 | "### 2.2 Set variables\n",
262 | "You need to set these variables:\n",
263 | " - The name of the DataFrame\n",
264 | " - Your credentials for the source file\n",
265 | " - A file name for the enriched DataFrame\n",
266 | " \n",
267 | "Define a variable, `df`, for the DataFrame that you just created. If necessary, change the original DataFrame name to match the one you created."
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": null,
273 | "metadata": {},
274 | "outputs": [],
275 | "source": [
276 | "# Make sure this uses the variable above. The number will vary in the inserted code.\n",
277 | "try:\n",
278 | " df = df_data_1\n",
279 | "except NameError as e:\n",
280 | " print('Error: Setup is incorrect or incomplete.\\n')\n",
281 | " print('Follow the instructions to insert the pandas DataFrame above, and edit to')\n",
282 | " print('make the generated df_data_# variable match the variable used here.')\n",
283 | " raise"
284 | ]
285 | },
286 | {
287 | "cell_type": "markdown",
288 | "metadata": {},
289 | "source": [
290 | "**Select the cell below and place your cursor on an empty line below the comment.** \n",
291 | "Put in the credentials for the file you want to enrich by clicking on the 10/01 (upper right), then click `Insert to code` under the file you want to enrich, and choose `Insert Credentials`.\n",
292 | "\n",
293 | "**Change the inserted variable name to `credentials_1`**\n",
294 | "\n",
295 | "### _User Input_ "
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": null,
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "# insert credentials for file - Change to credentials_1\n"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": null,
310 | "metadata": {},
311 | "outputs": [],
312 | "source": [
313 | "# Make sure this uses the variable above. The number will vary in the inserted code.\n",
314 | "try:\n",
315 | " credentials = credentials_1\n",
316 | "except NameError as e:\n",
317 | " print('Error: Setup is incorrect or incomplete.\\n')\n",
318 | " print('Follow the instructions to insert the file credentials above, and edit to')\n",
319 | " print('make the generated credentials_# variable match the variable used here.')\n",
320 | " raise"
321 | ]
322 | },
323 | {
324 | "cell_type": "markdown",
325 | "metadata": {},
326 | "source": [
327 | "\n",
328 | "## 3. Prepare Data\n",
329 | "You'll prepare the data by cleansing it and extracting the URLs. Many of the posts contain both text and a URL. The first task is to separate URLs from the text so that they can be analyzed separately. Then you need to get thumbnails for the photos and links, and convert any shortened URLs to full URLs."
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "\n",
337 | "### 3.1 Data Cleansing with Python\n",
338 | "Renaming columns, removing noticeable noise in the data, pulling out URLs and appending to a new column to run through NLU.\n",
339 | "\n",
340 | "To cleanse the data, you'll rename a column and add a column with the URLs that were embedded in the post. \n",
341 | "\n",
342 | "Change the name of the `Post Message` column to `Text`:"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": null,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "df.rename(columns={'Post Message': 'Text'}, inplace=True)"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": null,
357 | "metadata": {},
358 | "outputs": [],
359 | "source": [
360 | "# Drop the rows that have no value for the text.\n",
361 | "df.dropna(subset=['Text'], inplace=True)"
362 | ]
363 | },
364 | {
365 | "cell_type": "markdown",
366 | "metadata": {},
367 | "source": [
368 | "Use the `str.partition` function to remove strings that contain \"http\" and \"www\" from the `Text` column and save them in new DataFrames, then add all web addresses to a new `Link` column in the original DataFrame. This process captures all web addresses: https, http, and www."
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": null,
374 | "metadata": {},
375 | "outputs": [],
376 | "source": [
377 | "df_http = df[\"Text\"].str.partition(\"http\")\n",
378 | "df_www = df[\"Text\"].str.partition(\"www\")\n",
379 | "\n",
380 | "# Combine delimiters with actual links\n",
381 | "df_http[\"Link\"] = df_http[1].map(str) + df_http[2]\n",
382 | "df_www[\"Link1\"] = df_www[1].map(str) + df_www[2]\n",
383 | "\n",
384 | "# Include only Link columns\n",
385 | "df_http.drop(df_http.columns[0:3], axis=1, inplace = True)\n",
386 | "df_www.drop(df_www.columns[0:3], axis=1, inplace = True)\n",
387 | "\n",
388 | "# Merge http and www DataFrames\n",
389 | "dfmerge = pd.concat([df_http, df_www], axis=1)\n",
390 | "\n",
391 | "# The following steps will allow you to merge data columns from the left to the right\n",
392 | "dfmerge = dfmerge.apply(lambda x: x.str.strip()).replace('', np.nan)\n",
393 | "\n",
394 | "# Use fillna to fill any blanks with the Link1 column\n",
395 | "dfmerge[\"Link\"].fillna(dfmerge[\"Link1\"], inplace = True)\n",
396 | "\n",
397 | "# Delete Link1 (www column)\n",
398 | "dfmerge.drop(\"Link1\", axis=1, inplace = True)\n",
399 | "\n",
400 | "# Combine Link data frame\n",
401 | "df = pd.concat([dfmerge,df], axis = 1)\n",
402 | "\n",
403 | "# Make sure text column is a string\n",
404 | "df[\"Text\"] = df[\"Text\"].astype(\"str\")\n",
405 | "\n",
406 | "# Strip links from Text column\n",
407 | "df['Text'] = df['Text'].apply(lambda x: x.split('http')[0])\n",
408 | "df['Text'] = df['Text'].apply(lambda x: x.split('www')[0])"
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {},
414 | "source": [
415 | "### 3.2 Extract thumbnails and extended links\n",
416 | "\n",
417 | "A standard Facebook export does not provide the thumbnail that usually summarizes the link or photo associated with each post. Use the Beautiful Soup library to go into the HTML of the post and extract the thumbnail text:"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": null,
423 | "metadata": {},
424 | "outputs": [],
425 | "source": [
426 | "# Change links from objects to strings\n",
427 | "for link in df.Link:\n",
428 | " df.Link.to_string()\n",
429 | "\n",
430 | "piclinks = []\n",
431 | "description = []\n",
432 | "for url in df[\"Link\"]:\n",
433 | " if pd.isnull(url):\n",
434 | " piclinks.append(\"\")\n",
435 | " description.append(\"\")\n",
436 | " continue\n",
437 | " \n",
438 | " try:\n",
439 | " # Skip certificate check with verify=False.\n",
440 | " # Don't do this if your urls are not secure.\n",
441 | " page3 = requests.get(url, verify=False)\n",
442 | " if page3.status_code != requests.codes.ok:\n",
443 | " piclinks.append(\"\")\n",
444 | " description.append(\"\")\n",
445 | " continue\n",
446 | " except Exception as e:\n",
447 | " print(\"Skipping url %s: %s\" % (url, e))\n",
448 | " piclinks.append(\"\")\n",
449 | " description.append(\"\")\n",
450 | " continue\n",
451 | " \n",
452 | " soup3 = bs(page3.text,\"lxml\")\n",
453 | " \n",
454 | " pic = soup3.find('meta', property =\"og:image\")\n",
455 | " if pic:\n",
456 | " piclinks.append(pic[\"content\"])\n",
457 | " else: \n",
458 | " piclinks.append(\"\")\n",
459 | " \n",
460 | " content = None\n",
461 | " desc = soup3.find(attrs={'name':'Description'})\n",
462 | " if desc:\n",
463 | " content = desc['content']\n",
464 | " if not content or content == 'null':\n",
465 | " # Try again with lowercase description\n",
466 | " desc = soup3.find(attrs={'name':'description'})\n",
467 | " if desc:\n",
468 | " content = desc['content']\n",
469 | " if not content or content == 'null':\n",
470 | " description.append(\"\")\n",
471 | " else:\n",
472 | " description.append(content)\n",
473 | " \n",
474 | "# Save thumbnail descriptions to df in a column titled 'Thumbnails'\n",
475 | "df[\"Thumbnails\"] = description\n",
476 | "# Save image links to df in a column titled 'Image'\n",
477 | "df[\"Image\"] = piclinks"
478 | ]
479 | },
480 | {
481 | "cell_type": "markdown",
482 | "metadata": {},
483 | "source": [
484 | "Convert shortened links to full links.\n",
485 | "Use requests module to pull extended links. This is only necessary if the Facebook page uses different links than the articles themselves. For this example we are using IBM Watson's Facebook export which uses an IBM link. \n"
486 | ]
487 | },
488 | {
489 | "cell_type": "code",
490 | "execution_count": null,
491 | "metadata": {},
492 | "outputs": [],
493 | "source": [
494 | "shortlink = df[\"Link\"]\n",
495 | "extendedlink = []\n",
496 | "\n",
497 | "for link in shortlink:\n",
498 | " if isinstance(link, float): # Float is not a URL, probably NaN.\n",
499 | " extendedlink.append('')\n",
500 | " else:\n",
501 | " try:\n",
502 | " extended_link = requests.Session().head(link, allow_redirects=True).url\n",
503 | " extendedlink.append(extended_link)\n",
504 | " except Exception as e:\n",
505 | " print(\"Skipping link %s: %s\" % (link, e))\n",
506 | " extendedlink.append('')\n",
507 | "\n",
508 | "df[\"Extended Links\"] = extendedlink"
509 | ]
510 | },
511 | {
512 | "cell_type": "markdown",
513 | "metadata": {},
514 | "source": [
515 | " \n",
516 | "## 4. Enrichment Time!\n",
517 | "\n",
518 | "### 4.1 NLU for the Post Text\n",
519 | "The following script is an example of how to use Natural Language Understanding to iterate through each post and extract enrichment features for future analysis.\n",
520 | "\n",
521 | "For this example, we are looking at the `Text` column in our DataFrame, which contains the text of each post. NLU can also iterate through a column of URLs, or other freeform text. There's a list within a list for the Keywords and Entities features to allow gathering multiple entities and keywords from each piece of text.\n",
522 | "\n",
523 | "Each extracted feature is appended to the DataFrame in a new column that's defined at the end of the script. If you want to run this same script for the other columns, set the loop iterable to the column name, if you are using URLs, change the `text=response` parameter to `url=response`, and update the new column names as necessary. "
524 | ]
525 | },
526 | {
527 | "cell_type": "code",
528 | "execution_count": null,
529 | "metadata": {},
530 | "outputs": [],
531 | "source": [
532 | "# Define the list of features to get enrichment values for entities, keywords, emotion and sentiment\n",
533 | "features = Features(entities=EntitiesOptions(), keywords=KeywordsOptions(), emotion=EmotionOptions(), sentiment=SentimentOptions())\n",
534 | "\n",
535 | "overallSentimentScore = []\n",
536 | "overallSentimentType = []\n",
537 | "highestEmotion = []\n",
538 | "highestEmotionScore = []\n",
539 | "kywords = []\n",
540 | "entities = []\n",
541 | "\n",
542 | "# Go through every response and enrich the text using NLU.\n",
543 | "for text in df['Text']:\n",
544 | " if not text:\n",
545 | " # print(\"Text is empty\")\n",
546 | " overallSentimentScore.append('0')\n",
547 | " overallSentimentType.append('0')\n",
548 | " highestEmotion.append(\"\")\n",
549 | " highestEmotionScore.append(\"\")\n",
550 | " kywords.append(\"\")\n",
551 | " entities.append(\"\")\n",
552 | " continue\n",
553 | " else:\n",
554 | " # We are assuming English to avoid errors when the language cannot be detected.\n",
555 | " enriched_json = nlu.analyze(text=text, features=features, language='en').get_result()\n",
556 | "\n",
557 | " # Get the SENTIMENT score and type\n",
558 | " if 'sentiment' in enriched_json:\n",
559 | " if('score' in enriched_json['sentiment'][\"document\"]):\n",
560 | " overallSentimentScore.append(enriched_json[\"sentiment\"][\"document\"][\"score\"])\n",
561 | " else:\n",
562 | " overallSentimentScore.append('0')\n",
563 | "\n",
564 | " if('label' in enriched_json['sentiment'][\"document\"]):\n",
565 | " overallSentimentType.append(enriched_json[\"sentiment\"][\"document\"][\"label\"])\n",
566 | " else:\n",
567 | " overallSentimentType.append('0')\n",
568 | " else:\n",
569 | " overallSentimentScore.append('0')\n",
570 | " overallSentimentType.append('0')\n",
571 | "\n",
572 | " # Read the EMOTIONS into a dict and get the key (emotion) with maximum value\n",
573 | " if 'emotion' in enriched_json:\n",
574 | " me = max(enriched_json[\"emotion\"][\"document\"][\"emotion\"].items(), key=operator.itemgetter(1))[0]\n",
575 | " highestEmotion.append(me)\n",
576 | " highestEmotionScore.append(enriched_json[\"emotion\"][\"document\"][\"emotion\"][me])\n",
577 | " else:\n",
578 | " highestEmotion.append(\"\")\n",
579 | " highestEmotionScore.append(\"\")\n",
580 | "\n",
581 | " # Iterate and get KEYWORDS with a confidence of over 70%\n",
582 | " if 'keywords' in enriched_json:\n",
583 | " tmpkw = []\n",
584 | " for kw in enriched_json['keywords']:\n",
585 | " if(float(kw[\"relevance\"]) >= 0.7):\n",
586 | " tmpkw.append(kw[\"text\"])\n",
587 | " # Convert multiple keywords in a list to a string and append the string\n",
588 | " kywords.append(', '.join(tmpkw))\n",
589 | " else:\n",
590 | " kywords.append(\"\")\n",
591 | " \n",
592 | " # Iterate and get Entities with a confidence of over 30%\n",
593 | " if 'entities' in enriched_json:\n",
594 | " tmpent = []\n",
595 | " for ent in enriched_json['entities']: \n",
596 | " if(float(ent[\"relevance\"]) >= 0.3):\n",
597 | " tmpent.append(ent[\"type\"])\n",
598 | " \n",
599 | " # Convert multiple entities in a list to a string and append the string\n",
600 | " entities.append(', '.join(tmpent))\n",
601 | " else:\n",
602 | " entities.append(\"\")\n",
603 | " \n",
604 | "# Create columns from the list and append to the DataFrame\n",
605 | "if highestEmotion:\n",
606 | " df['TextHighestEmotion'] = highestEmotion\n",
607 | "if highestEmotionScore:\n",
608 | " df['TextHighestEmotionScore'] = highestEmotionScore\n",
609 | "\n",
610 | "if overallSentimentType:\n",
611 | " df['TextOverallSentimentType'] = overallSentimentType\n",
612 | "if overallSentimentScore:\n",
613 | " df['TextOverallSentimentScore'] = overallSentimentScore\n",
614 | "\n",
615 | "df['TextKeywords'] = kywords\n",
616 | "df['TextEntities'] = entities"
617 | ]
618 | },
619 | {
620 | "cell_type": "markdown",
621 | "metadata": {},
622 | "source": [
623 | "After we extract all of the Keywords and Entities from each Post, we have columns with multiple Keywords and Entities separated by commas. For our Analysis in Part II, we also wanted the top Keyword and Entity for each Post. Because of this, we added two new columns to capture the `MaxTextKeyword` and `MaxTextEntity`."
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": null,
629 | "metadata": {},
630 | "outputs": [],
631 | "source": [
632 | "# Choose first of Keywords and Entities\n",
633 | "df[\"MaxTextKeywords\"] = df[\"TextKeywords\"].apply(lambda x: x.split(',')[0])\n",
634 | "df[\"MaxTextEntity\"] = df[\"TextEntities\"].apply(lambda x: x.split(',')[0])"
635 | ]
636 | },
637 | {
638 | "cell_type": "markdown",
639 | "metadata": {},
640 | "source": [
641 | "\n",
642 | "### 4.2 NLU for Thumbnail Text\n",
643 | "\n",
644 | "We will repeat the same process for Thumbnails and Article Text."
645 | ]
646 | },
647 | {
648 | "cell_type": "code",
649 | "execution_count": null,
650 | "metadata": {},
651 | "outputs": [],
652 | "source": [
653 | "# Define the list of features to get enrichment values for entities, keywords, emotion and sentiment\n",
654 | "features = Features(entities=EntitiesOptions(), keywords=KeywordsOptions(), emotion=EmotionOptions(), sentiment=SentimentOptions())\n",
655 | "\n",
656 | "overallSentimentScore = []\n",
657 | "overallSentimentType = []\n",
658 | "highestEmotion = []\n",
659 | "highestEmotionScore = []\n",
660 | "kywords = []\n",
661 | "entities = []\n",
662 | "\n",
663 | "# Go through every response and enrich the text using NLU.\n",
664 | "for text in df['Thumbnails']:\n",
665 | " if not text:\n",
666 | " overallSentimentScore.append(' ')\n",
667 | " overallSentimentType.append(' ')\n",
668 | " highestEmotion.append(' ')\n",
669 | " highestEmotionScore.append(' ')\n",
670 | " kywords.append(' ')\n",
671 | " entities.append(' ')\n",
672 | " continue\n",
673 | "\n",
674 | " enriched_json = nlu.analyze(text=text, features=features, language='en').get_result()\n",
675 | "\n",
676 | " # Get the SENTIMENT score and type\n",
677 | " if 'sentiment' in enriched_json:\n",
678 | " if('score' in enriched_json['sentiment'][\"document\"]):\n",
679 | " overallSentimentScore.append(enriched_json[\"sentiment\"][\"document\"][\"score\"])\n",
680 | " else:\n",
681 | " overallSentimentScore.append(\"\")\n",
682 | "\n",
683 | " if('label' in enriched_json['sentiment'][\"document\"]):\n",
684 | " overallSentimentType.append(enriched_json[\"sentiment\"][\"document\"][\"label\"])\n",
685 | " else:\n",
686 | " overallSentimentType.append(\"\")\n",
687 | "\n",
688 | " # Read the EMOTIONS into a dict and get the key (emotion) with maximum value\n",
689 | " if 'emotion' in enriched_json:\n",
690 | " me = max(enriched_json[\"emotion\"][\"document\"][\"emotion\"].items(), key=operator.itemgetter(1))[0]\n",
691 | " highestEmotion.append(me)\n",
692 | " highestEmotionScore.append(enriched_json[\"emotion\"][\"document\"][\"emotion\"][me])\n",
693 | "\n",
694 | " else:\n",
695 | " highestEmotion.append(\"\")\n",
696 | " highestEmotionScore.append(\"\")\n",
697 | "\n",
698 | " # Iterate and get KEYWORDS with a confidence of over 70%\n",
699 | " if 'keywords' in enriched_json:\n",
700 | " tmpkw = []\n",
701 | " for kw in enriched_json['keywords']:\n",
702 | " if(float(kw[\"relevance\"]) >= 0.7):\n",
703 | " tmpkw.append(kw[\"text\"])\n",
704 | " # Convert multiple keywords in a list to a string and append the string\n",
705 | " kywords.append(', '.join(tmpkw))\n",
706 | " \n",
707 | " # Iterate and get Entities with a confidence of over 30%\n",
708 | " if 'entities' in enriched_json:\n",
709 | " tmpent = []\n",
710 | " for ent in enriched_json['entities']: \n",
711 | " if(float(ent[\"relevance\"]) >= 0.3):\n",
712 | " tmpent.append(ent[\"type\"])\n",
713 | " # Convert multiple entities in a list to a string and append the string\n",
714 | " entities.append(', '.join(tmpent))\n",
715 | " else:\n",
716 | " entities.append(\"\") \n",
717 | " \n",
718 | "# Create columns from the list and append to the DataFrame\n",
719 | "if highestEmotion:\n",
720 | " df['ThumbnailHighestEmotion'] = highestEmotion\n",
721 | "if highestEmotionScore:\n",
722 | " df['ThumbnailHighestEmotionScore'] = highestEmotionScore\n",
723 | "\n",
724 | "if overallSentimentType:\n",
725 | " df['ThumbnailOverallSentimentType'] = overallSentimentType\n",
726 | "if overallSentimentScore:\n",
727 | " df['ThumbnailOverallSentimentScore'] = overallSentimentScore\n",
728 | "\n",
729 | "df['ThumbnailKeywords'] = kywords\n",
730 | "df['ThumbnailEntities'] = entities"
731 | ]
732 | },
733 | {
734 | "cell_type": "markdown",
735 | "metadata": {},
736 | "source": [
737 | " Add two new columns to capture the `MaxThumbnailKeyword` and `MaxThumbnailEntity`:"
738 | ]
739 | },
740 | {
741 | "cell_type": "code",
742 | "execution_count": null,
743 | "metadata": {},
744 | "outputs": [],
745 | "source": [
746 | "# Set 'Max' to first one from keywords and entities lists\n",
747 | "df[\"MaxThumbnailKeywords\"] = df[\"ThumbnailKeywords\"].apply(lambda x: x.split(',')[0])\n",
748 | "df[\"MaxThumbnailEntity\"] = df[\"ThumbnailEntities\"].apply(lambda x: x.split(',')[0])"
749 | ]
750 | },
751 | {
752 | "cell_type": "markdown",
753 | "metadata": {},
754 | "source": [
755 | " \n",
756 | "### 4.3 NLU for Article Text"
757 | ]
758 | },
759 | {
760 | "cell_type": "code",
761 | "execution_count": null,
762 | "metadata": {},
763 | "outputs": [],
764 | "source": [
765 | "# Define the list of features to get enrichment values for entities, keywords, emotion and sentiment\n",
766 | "features = Features(entities=EntitiesOptions(), keywords=KeywordsOptions(), emotion=EmotionOptions(), sentiment=SentimentOptions())\n",
767 | "\n",
768 | "overallSentimentScore = []\n",
769 | "overallSentimentType = []\n",
770 | "highestEmotion = []\n",
771 | "highestEmotionScore = []\n",
772 | "kywords = []\n",
773 | "entities = []\n",
774 | "article_text = []\n",
775 | " \n",
776 | "# Go through every response and enrich the article using NLU\n",
777 | "for url in df['Extended Links']:\n",
778 | " if not url:\n",
779 | " overallSentimentScore.append(' ')\n",
780 | " overallSentimentType.append(' ')\n",
781 | " highestEmotion.append(' ')\n",
782 | " highestEmotionScore.append(' ')\n",
783 | " kywords.append(' ')\n",
784 | " entities.append(' ')\n",
785 | " article_text.append(' ')\n",
786 | " continue\n",
787 | "\n",
788 | " # Run links through NLU to get entities, keywords, emotion and sentiment.\n",
789 | " # Use return_analyzed_text to extract text for Tone Analyzer to use.\n",
790 | " \n",
791 | " try:\n",
792 | " enriched_json = nlu.analyze(url=url,\n",
793 | " features=features,\n",
794 | " language='en',\n",
795 | " return_analyzed_text=True).get_result()\n",
796 | " article_text.append(enriched_json[\"analyzed_text\"])\n",
797 | " except Exception as e:\n",
798 | " print(\"Skipping url %s: %s\" % (url, e))\n",
799 | " overallSentimentScore.append(' ')\n",
800 | " overallSentimentType.append(' ')\n",
801 | " highestEmotion.append(' ')\n",
802 | " highestEmotionScore.append(' ')\n",
803 | " kywords.append(' ')\n",
804 | " entities.append(' ')\n",
805 | " article_text.append(' ')\n",
806 | " continue\n",
807 | " \n",
808 | " \n",
809 | " \n",
810 | "\n",
811 | " # Get the SENTIMENT score and type\n",
812 | " if 'sentiment' in enriched_json:\n",
813 | " if('score' in enriched_json['sentiment'][\"document\"]):\n",
814 | " overallSentimentScore.append(enriched_json[\"sentiment\"][\"document\"][\"score\"])\n",
815 | " else:\n",
816 | " overallSentimentScore.append('None')\n",
817 | "\n",
818 | " if('label' in enriched_json['sentiment'][\"document\"]):\n",
819 | " overallSentimentType.append(enriched_json[\"sentiment\"][\"document\"][\"label\"])\n",
820 | " else:\n",
821 | " overallSentimentType.append('')\n",
822 | "\n",
823 | " # Read the EMOTIONS into a dict and get the key (emotion) with maximum value\n",
824 | " if 'emotion' in enriched_json:\n",
825 | " me = max(enriched_json[\"emotion\"][\"document\"][\"emotion\"].items(), key=operator.itemgetter(1))[0]\n",
826 | " highestEmotion.append(me)\n",
827 | " highestEmotionScore.append(enriched_json[\"emotion\"][\"document\"][\"emotion\"][me])\n",
828 | "\n",
829 | " else:\n",
830 | " highestEmotion.append('')\n",
831 | " highestEmotionScore.append('')\n",
832 | "\n",
833 | " # Iterate and get KEYWORDS with a confidence of over 70%\n",
834 | " if 'keywords' in enriched_json:\n",
835 | " tmpkw = []\n",
836 | " for kw in enriched_json['keywords']:\n",
837 | " if(float(kw[\"relevance\"]) >= 0.7):\n",
838 | " tmpkw.append(kw[\"text\"])\n",
839 | " # Convert multiple keywords in a list to a string and append the string\n",
840 | " kywords.append(', '.join(tmpkw))\n",
841 | " else: \n",
842 | " kywords.append(\"\")\n",
843 | " \n",
844 | " # Iterate and get Entities with a confidence of over 30%\n",
845 | " if 'entities' in enriched_json:\n",
846 | " tmpent = []\n",
847 | " for ent in enriched_json['entities']: \n",
848 | " if(float(ent[\"relevance\"]) >= 0.3):\n",
849 | " tmpent.append(ent[\"type\"])\n",
850 | " # Convert multiple entities in a list to a string and append the string\n",
851 | " entities.append(', '.join(tmpent))\n",
852 | " else:\n",
853 | " entities.append(\"\")\n",
854 | " \n",
855 | "# Create columns from the list and append to the DataFrame\n",
856 | "if highestEmotion:\n",
857 | " df['LinkHighestEmotion'] = highestEmotion\n",
858 | "if highestEmotionScore:\n",
859 | " df['LinkHighestEmotionScore'] = highestEmotionScore\n",
860 | "\n",
861 | "if overallSentimentType:\n",
862 | " df['LinkOverallSentimentType'] = overallSentimentType\n",
863 | "if overallSentimentScore:\n",
864 | " df['LinkOverallSentimentScore'] = overallSentimentScore\n",
865 | "\n",
866 | "df['LinkKeywords'] = kywords\n",
867 | "df['LinkEntities'] = entities\n",
868 | "df['Article Text'] = article_text"
869 | ]
870 | },
871 | {
872 | "cell_type": "markdown",
873 | "metadata": {},
874 | "source": [
875 | "Add two new columns to capture the `MaxLinkKeyword` and `MaxLinkEntity`:"
876 | ]
877 | },
878 | {
879 | "cell_type": "code",
880 | "execution_count": null,
881 | "metadata": {},
882 | "outputs": [],
883 | "source": [
884 | "# Set 'Max' to first one from keywords and entities lists\n",
885 | "df[\"MaxLinkKeywords\"] = df[\"LinkKeywords\"].apply(lambda x: x.split(',')[0])\n",
886 | "df[\"MaxLinkEntity\"] = df[\"LinkEntities\"].apply(lambda x: x.split(',')[0])"
887 | ]
888 | },
889 | {
890 | "cell_type": "markdown",
891 | "metadata": {},
892 | "source": [
893 | " \n",
894 | "### 4.4 Visual Recognition\n",
895 | "Below uses Visual Recognition to classify the thumbnail images.\n",
896 | "\n",
897 | "> NOTE: When using the **free tier** of Visual Recognition, _classify_ has a limit of 250 images per day."
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": null,
903 | "metadata": {},
904 | "outputs": [],
905 | "source": [
906 | "if visual_recognition:\n",
907 | " piclinks = df[\"Image\"]\n",
908 | "\n",
909 | " picclass = []\n",
910 | " piccolor = []\n",
911 | " pictype1 = []\n",
912 | " pictype2 = []\n",
913 | " pictype3 = []\n",
914 | "\n",
915 | " for pic in piclinks:\n",
916 | " if not pic or pic == 'default-img':\n",
917 | " picclass.append(' ')\n",
918 | " piccolor.append(' ')\n",
919 | " pictype1.append(' ')\n",
920 | " pictype2.append(' ')\n",
921 | " pictype3.append(' ')\n",
922 | " continue\n",
923 | "\n",
924 | " classes = []\n",
925 | " enriched_json = {}\n",
926 | " try:\n",
927 | " enriched_json = visual_recognition.classify(url=pic).get_result()\n",
928 | " except Exception as e:\n",
929 | " print(\"Skipping url %s: %s\" % (pic, e))\n",
930 | "\n",
931 | " if 'error' in enriched_json:\n",
932 | " print(enriched_json['error'])\n",
933 | " if 'images' in enriched_json and 'classifiers' in enriched_json['images'][0]:\n",
934 | " classes = enriched_json['images'][0][\"classifiers\"][0][\"classes\"]\n",
935 | "\n",
936 | " color1 = None\n",
937 | " class1 = None\n",
938 | " type_hierarchy1 = None\n",
939 | "\n",
940 | " for iclass in classes:\n",
941 | " # Grab the first color, first class, and first type hierarchy.\n",
942 | " # Note: Usually you'd filter by 'score' too.\n",
943 | " if not type_hierarchy1 and 'type_hierarchy' in iclass:\n",
944 | " type_hierarchy1 = iclass['type_hierarchy']\n",
945 | " if not class1:\n",
946 | " class1 = iclass['class']\n",
947 | " if not color1 and iclass['class'].endswith(' color'):\n",
948 | " color1 = iclass['class'][:-len(' color')]\n",
949 | " if type_hierarchy1 and class1 and color1:\n",
950 | " # We are only using 1 of each per image. When we have all 3, break.\n",
951 | " break\n",
952 | "\n",
953 | " picclass.append(class1 or ' ')\n",
954 | " piccolor.append(color1 or ' ')\n",
955 | " type_split = (type_hierarchy1 or '/ / / ').split('/')\n",
956 | " pictype1.append(type_split[1] if len(type_split) > 1 else '-')\n",
957 | " pictype2.append(type_split[2] if len(type_split) > 2 else '- ')\n",
958 | " pictype3.append(type_split[3] if len(type_split) > 3 else '-')\n",
959 | "\n",
960 | " df[\"Image Color\"] = piccolor\n",
961 | " df[\"Image Class\"] = picclass\n",
962 | " df[\"Image Type\"] = pictype1\n",
963 | " df[\"Image Subtype\"] = pictype2\n",
964 | " df[\"Image Subtype2\"] = pictype3"
965 | ]
966 | },
967 | {
968 | "cell_type": "markdown",
969 | "metadata": {},
970 | "source": [
971 | " \n",
972 | "## Enrichment is now COMPLETE!\n",
973 | " \n",
974 | "Save a copy of the enriched DataFrame as a file in Cloud Object Storage. To run the upload_file function we first need to create a variable that contains our credentials we created in section 2.2. No user input is required as we already have all of the information we need. To upload the file to COS simply run the next two cells."
975 | ]
976 | },
977 | {
978 | "cell_type": "code",
979 | "execution_count": null,
980 | "metadata": {},
981 | "outputs": [],
982 | "source": [
983 | "cos = ibm_boto3.client(service_name='s3',\n",
984 | " ibm_api_key_id=credentials['IBM_API_KEY_ID'],\n",
985 | " ibm_service_instance_id=credentials['IAM_SERVICE_ID'],\n",
986 | " ibm_auth_endpoint=credentials['IBM_AUTH_ENDPOINT'],\n",
987 | " config=Config(signature_version='oauth'),\n",
988 | " endpoint_url=credentials['ENDPOINT'])"
989 | ]
990 | },
991 | {
992 | "cell_type": "code",
993 | "execution_count": null,
994 | "metadata": {},
995 | "outputs": [],
996 | "source": [
997 | "# Build the enriched file name from the original filename.\n",
998 | "localfilename = 'enriched_' + credentials['FILE']\n",
999 | "\n",
1000 | "# Write a CSV file from the enriched pandas DataFrame.\n",
1001 | "df.to_csv(localfilename, index=False)\n",
1002 | "\n",
1003 | "# Use the above put_file method with credentials to put the file in Object Storage.\n",
1004 | "cos.upload_file(localfilename, Bucket=credentials['BUCKET'],Key=localfilename)"
1005 | ]
1006 | },
1007 | {
1008 | "cell_type": "code",
1009 | "execution_count": null,
1010 | "metadata": {},
1011 | "outputs": [],
1012 | "source": [
1013 | "# If you want to use the enriched local file, you can read it back in.\n",
1014 | "# This might be handy if you already enriched and just want to re-run\n",
1015 | "# from this cell and below. Uncomment the following line.\n",
1016 | "\n",
1017 | "# df = pd.read_csv(localfilename)"
1018 | ]
1019 | },
1020 | {
1021 | "cell_type": "markdown",
1022 | "metadata": {},
1023 | "source": [
1024 | " \n",
1025 | "# Part II - Data Preparation\n",
1026 | "\n",
1027 | "## 1. Prepare Data\n",
1028 | " \n",
1029 | "### 1.1 Prepare Multiple DataFrames for Visualizations\n",
1030 | "Before we can create the separate tables for each Watson feature we need to organize and reformat the data. First, we need to determine which data points are tied to metrics. Second, we need to make sure make sure each metric is numeric. _(This is necessary for PixieDust in Part III)_"
1031 | ]
1032 | },
1033 | {
1034 | "cell_type": "code",
1035 | "execution_count": null,
1036 | "metadata": {},
1037 | "outputs": [],
1038 | "source": [
1039 | "# Put the lifetime metrics in a list\n",
1040 | "metrics = [metric for metric in df.columns.values.tolist() if 'Lifetime' in metric]"
1041 | ]
1042 | },
1043 | {
1044 | "cell_type": "markdown",
1045 | "metadata": {},
1046 | "source": [
1047 | " \n",
1048 | "### 1.2 Create a Consolidated Sentiment and Emotion DataFrame\n",
1049 | "You'll create a DataFrame for the sentiment and emotion of the post text and a DataFrame for the sentiment and emotion of the article text. Then you'll combine them into one DataFrame.\n"
1050 | ]
1051 | },
1052 | {
1053 | "cell_type": "markdown",
1054 | "metadata": {},
1055 | "source": [
1056 | "#### Post Sentiment and Emotion DataFrame"
1057 | ]
1058 | },
1059 | {
1060 | "cell_type": "code",
1061 | "execution_count": null,
1062 | "metadata": {},
1063 | "outputs": [],
1064 | "source": [
1065 | "# Create a list with only Post sentiment and emotion values\n",
1066 | "post_tones = [\"Text\",\"TextHighestEmotion\", \"TextHighestEmotionScore\", \"TextOverallSentimentType\", \"TextOverallSentimentScore\"]\n",
1067 | "\n",
1068 | "# Append DataFrame with these metrics\n",
1069 | "post_tones.extend(metrics)\n",
1070 | "\n",
1071 | "# Create a new DataFrame with metrics and sentiment and emotion\n",
1072 | "df_post_tones = df[post_tones]\n",
1073 | "\n",
1074 | "# Determine which tone values are suppose to be numeric and ensure they are numeric. \n",
1075 | "post_numeric_values = [\"TextHighestEmotionScore\", \"TextOverallSentimentScore\"]\n",
1076 | "for i in post_numeric_values:\n",
1077 | " df_post_tones[i] = pd.to_numeric(df_post_tones[i], errors='coerce')\n",
1078 | "\n",
1079 | "# Make all metrics numeric\n",
1080 | "for i in metrics:\n",
1081 | " df_post_tones[i] = pd.to_numeric(df_post_tones[i], errors='coerce')\n",
1082 | "\n",
1083 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1084 | "df_post_tones[\"Type\"] = \"Post\""
1085 | ]
1086 | },
1087 | {
1088 | "cell_type": "markdown",
1089 | "metadata": {},
1090 | "source": [
1091 | "#### Article Sentiment and Emotion DataFrame"
1092 | ]
1093 | },
1094 | {
1095 | "cell_type": "code",
1096 | "execution_count": null,
1097 | "metadata": {},
1098 | "outputs": [],
1099 | "source": [
1100 | "# Create a list with only Article sentiment and emotion values\n",
1101 | "article_tones = [\"Text\", \"LinkHighestEmotion\", \"LinkHighestEmotionScore\", \"LinkOverallSentimentType\", \"LinkOverallSentimentScore\"]\n",
1102 | "\n",
1103 | "# Append DataFrame with these metrics\n",
1104 | "article_tones.extend(metrics)\n",
1105 | "\n",
1106 | "# Create a new DataFrame with metrics and sentiment and emotion\n",
1107 | "df_article_tones = df[article_tones]\n",
1108 | "\n",
1109 | "# Determine which values are suppose to be numeric and ensure they are numeric. \n",
1110 | "art_numeric_values = [\"LinkHighestEmotionScore\", \"LinkOverallSentimentScore\"]\n",
1111 | "for i in art_numeric_values:\n",
1112 | " df_article_tones[i] = pd.to_numeric(df_article_tones[i], errors='coerce')\n",
1113 | " \n",
1114 | "# Make all metrics numeric\n",
1115 | "for i in metrics:\n",
1116 | " df_article_tones[i] = pd.to_numeric(df_article_tones[i], errors='coerce')\n",
1117 | "\n",
1118 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1119 | "df_article_tones[\"Type\"] = \"Article\""
1120 | ]
1121 | },
1122 | {
1123 | "cell_type": "markdown",
1124 | "metadata": {},
1125 | "source": [
1126 | "#### Combine Post and Article DataFrames to Make DataFrame with Sentiment and Emotion"
1127 | ]
1128 | },
1129 | {
1130 | "cell_type": "code",
1131 | "execution_count": null,
1132 | "metadata": {},
1133 | "outputs": [],
1134 | "source": [
1135 | "# First make the Column Headers the same\n",
1136 | "df_post_tones.rename(columns={\"TextHighestEmotion\":\"Emotion\",\n",
1137 | " \"TextHighestEmotionScore\":\"Emotion Score\",\n",
1138 | " \"TextOverallSentimentType\": \"Sentiment\",\n",
1139 | " \"TextOverallSentimentScore\": \"Sentiment Score\"\n",
1140 | " },\n",
1141 | " inplace=True)\n",
1142 | "\n",
1143 | "df_article_tones.rename(columns={\"LinkHighestEmotion\":\"Emotion\",\n",
1144 | " \"LinkHighestEmotionScore\":\"Emotion Score\",\n",
1145 | " \"LinkOverallSentimentType\": \"Sentiment\",\n",
1146 | " \"LinkOverallSentimentScore\": \"Sentiment Score\"\n",
1147 | " },\n",
1148 | " inplace=True)\n",
1149 | "\n",
1150 | "# Combine into one data frame\n",
1151 | "df_tones = pd.concat([df_post_tones, df_article_tones])"
1152 | ]
1153 | },
1154 | {
1155 | "cell_type": "code",
1156 | "execution_count": null,
1157 | "metadata": {},
1158 | "outputs": [],
1159 | "source": [
1160 | "# Only keep the positive, neutral, and negative sentiments. The others are empty or unusable.\n",
1161 | "df_tones = df_tones[df_tones.Sentiment.isin(['positive', 'neutral', 'negative'])]"
1162 | ]
1163 | },
1164 | {
1165 | "cell_type": "markdown",
1166 | "metadata": {},
1167 | "source": [
1168 | " \n",
1169 | "### 1.3 Create a Consolidated Keyword DataFrame\n",
1170 | "You'll create DataFrames for the keywords of the article text, the thumbnail text, and the post text. Then you'll combine them into one DataFrame."
1171 | ]
1172 | },
1173 | {
1174 | "cell_type": "markdown",
1175 | "metadata": {},
1176 | "source": [
1177 | " #### Article Keyword DataFrame "
1178 | ]
1179 | },
1180 | {
1181 | "cell_type": "code",
1182 | "execution_count": null,
1183 | "metadata": {},
1184 | "outputs": [],
1185 | "source": [
1186 | "# Create a list with only Article Keywords\n",
1187 | "article_keywords = [\"Text\", \"MaxLinkKeywords\"]\n",
1188 | "\n",
1189 | "# Append DataFrame with these metrics\n",
1190 | "article_keywords.extend(metrics)\n",
1191 | "\n",
1192 | "# Create a new DataFrame with keywords and metrics\n",
1193 | "df_article_keywords = df[article_keywords]\n",
1194 | "\n",
1195 | "# Make all metrics numeric\n",
1196 | "for i in metrics:\n",
1197 | " df_article_keywords[i] = pd.to_numeric(df_article_keywords[i], errors='coerce')\n",
1198 | "\n",
1199 | "# Drop NA Values in Keywords Column\n",
1200 | "df_article_keywords['MaxLinkKeywords'].replace(' ', np.nan, inplace=True)\n",
1201 | "df_article_keywords.dropna(subset=['MaxLinkKeywords'], inplace=True)\n",
1202 | "\n",
1203 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1204 | "df_article_keywords[\"Type\"] = \"Article\""
1205 | ]
1206 | },
1207 | {
1208 | "cell_type": "markdown",
1209 | "metadata": {},
1210 | "source": [
1211 | "#### Thumbnail Keyword DataFrame "
1212 | ]
1213 | },
1214 | {
1215 | "cell_type": "code",
1216 | "execution_count": null,
1217 | "metadata": {},
1218 | "outputs": [],
1219 | "source": [
1220 | "# Create a list with only Thumbnail Keywords\n",
1221 | "thumbnail_keywords = [\"Text\", \"MaxThumbnailKeywords\"]\n",
1222 | "\n",
1223 | "# Append DataFrame with these metrics\n",
1224 | "thumbnail_keywords.extend(metrics)\n",
1225 | "\n",
1226 | "# Create a new DataFrame with keywords and metrics\n",
1227 | "df_thumbnail_keywords = df[thumbnail_keywords]\n",
1228 | "\n",
1229 | "# Make all metrics numeric\n",
1230 | "for i in metrics:\n",
1231 | " df_thumbnail_keywords[i] = pd.to_numeric(df_thumbnail_keywords[i], errors='coerce')\n",
1232 | " \n",
1233 | "# Drop NA Values in Keywords Column\n",
1234 | "df_thumbnail_keywords['MaxThumbnailKeywords'].replace(' ', np.nan, inplace=True)\n",
1235 | "df_thumbnail_keywords.dropna(subset=['MaxThumbnailKeywords'], inplace=True)\n",
1236 | "\n",
1237 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1238 | "df_thumbnail_keywords[\"Type\"] = \"Thumbnails\""
1239 | ]
1240 | },
1241 | {
1242 | "cell_type": "markdown",
1243 | "metadata": {},
1244 | "source": [
1245 | "#### Post Keyword DataFrame "
1246 | ]
1247 | },
1248 | {
1249 | "cell_type": "code",
1250 | "execution_count": null,
1251 | "metadata": {},
1252 | "outputs": [],
1253 | "source": [
1254 | "# Create a list with only Thumbnail Keywords\n",
1255 | "post_keywords = [\"Text\", \"MaxTextKeywords\"]\n",
1256 | "\n",
1257 | "# Append DataFrame with these metrics\n",
1258 | "post_keywords.extend(metrics)\n",
1259 | "\n",
1260 | "# Create a new DataFrame with keywords and metrics\n",
1261 | "df_post_keywords = df[post_keywords]\n",
1262 | "\n",
1263 | "# Make all metrics numeric\n",
1264 | "for i in metrics:\n",
1265 | " df_post_keywords[i] = pd.to_numeric(df_post_keywords[i], errors='coerce')\n",
1266 | " \n",
1267 | "# Drop NA Values in Keywords Column\n",
1268 | "df_post_keywords['MaxTextKeywords'].replace(' ', np.nan, inplace=True)\n",
1269 | "df_post_keywords.dropna(subset=['MaxTextKeywords'], inplace=True)\n",
1270 | "\n",
1271 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1272 | "df_post_keywords[\"Type\"] = \"Posts\""
1273 | ]
1274 | },
1275 | {
1276 | "cell_type": "markdown",
1277 | "metadata": {},
1278 | "source": [
1279 | "#### Combine Post, Thumbnail, and Article DataFrames to Make One Keywords DataFrame"
1280 | ]
1281 | },
1282 | {
1283 | "cell_type": "code",
1284 | "execution_count": null,
1285 | "metadata": {},
1286 | "outputs": [],
1287 | "source": [
1288 | "# First make the column headers the same\n",
1289 | "df_post_keywords.rename(columns={\"MaxTextKeywords\": \"Keywords\"}, inplace=True)\n",
1290 | "df_thumbnail_keywords.rename(columns={\"MaxThumbnailKeywords\":\"Keywords\"}, inplace=True)\n",
1291 | "df_article_keywords.rename(columns={\"MaxLinkKeywords\":\"Keywords\"}, inplace=True)\n",
1292 | "\n",
1293 | "# Combine into one data frame\n",
1294 | "df_keywords = pd.concat([df_post_keywords, df_thumbnail_keywords, df_article_keywords])\n",
1295 | "\n",
1296 | "# Discard posts with lower total reach to make charting easier\n",
1297 | "df_keywords = df_keywords[df_keywords[\"Lifetime Post Total Reach\"] > 20000]\n"
1298 | ]
1299 | },
1300 | {
1301 | "cell_type": "markdown",
1302 | "metadata": {},
1303 | "source": [
1304 | "\n",
1305 | "### 1.4 Create a Consolidated Entity DataFrame\n",
1306 | "You'll create DataFrames for the entities of the article text, the thumbnail text, and the post text. Then you'll combine them into one DataFrame."
1307 | ]
1308 | },
1309 | {
1310 | "cell_type": "markdown",
1311 | "metadata": {},
1312 | "source": [
1313 | "#### Article Entity DataFrame "
1314 | ]
1315 | },
1316 | {
1317 | "cell_type": "code",
1318 | "execution_count": null,
1319 | "metadata": {},
1320 | "outputs": [],
1321 | "source": [
1322 | "# Create a list with only Article Keywords\n",
1323 | "article_entities = [\"Text\", \"MaxLinkEntity\"]\n",
1324 | "\n",
1325 | "# Append DataFrame with these metrics\n",
1326 | "article_entities.extend(metrics)\n",
1327 | "\n",
1328 | "# Create a new DataFrame with keywords and metrics\n",
1329 | "df_article_entities = df[article_entities]\n",
1330 | " \n",
1331 | "# Make all metrics numeric\n",
1332 | "for i in metrics:\n",
1333 | " df_article_entities[i] = pd.to_numeric(df_article_entities[i], errors='coerce')\n",
1334 | " \n",
1335 | "# Drop NA Values in Keywords Column\n",
1336 | "df_article_entities['MaxLinkEntity'] = df[\"MaxLinkEntity\"].replace(r'\\s+', np.nan, regex=True)\n",
1337 | "df_article_entities.dropna(subset=['MaxLinkEntity'], inplace=True)\n",
1338 | "\n",
1339 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1340 | "df_article_entities[\"Type\"] = \"Article\""
1341 | ]
1342 | },
1343 | {
1344 | "cell_type": "markdown",
1345 | "metadata": {},
1346 | "source": [
1347 | "#### Thumbnail Entity DataFrame"
1348 | ]
1349 | },
1350 | {
1351 | "cell_type": "code",
1352 | "execution_count": null,
1353 | "metadata": {},
1354 | "outputs": [],
1355 | "source": [
1356 | "# Create a list with only Thumbnail Keywords\n",
1357 | "thumbnail_entities = [\"Text\", \"MaxThumbnailEntity\"]\n",
1358 | "\n",
1359 | "# Append DataFrame with these metrics\n",
1360 | "thumbnail_entities.extend(metrics)\n",
1361 | "\n",
1362 | "# Create a new DataFrame with keywords and metrics\n",
1363 | "df_thumbnail_entities = df[thumbnail_entities]\n",
1364 | "\n",
1365 | "# Make all metrics numeric\n",
1366 | "for i in metrics:\n",
1367 | " df_thumbnail_entities[i] = pd.to_numeric(df_thumbnail_entities[i], errors='coerce')\n",
1368 | " \n",
1369 | "# Drop NA Values in Keywords Column\n",
1370 | "df_thumbnail_entities['MaxThumbnailEntity'] = df_thumbnail_entities['MaxThumbnailEntity'].replace(r'\\s+', np.nan, regex=True)\n",
1371 | "df_thumbnail_entities.dropna(subset=['MaxThumbnailEntity'], inplace=True)\n",
1372 | "\n",
1373 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1374 | "df_thumbnail_entities[\"Type\"] = \"Thumbnails\""
1375 | ]
1376 | },
1377 | {
1378 | "cell_type": "markdown",
1379 | "metadata": {},
1380 | "source": [
1381 | "#### Post Entity DataFrame"
1382 | ]
1383 | },
1384 | {
1385 | "cell_type": "code",
1386 | "execution_count": null,
1387 | "metadata": {},
1388 | "outputs": [],
1389 | "source": [
1390 | "# Create a list with only Thumbnail Keywords\n",
1391 | "post_entities = [\"Text\", \"MaxTextEntity\"]\n",
1392 | "\n",
1393 | "# Append DataFrame with these metrics\n",
1394 | "post_entities.extend(metrics)\n",
1395 | "\n",
1396 | "# Create a new DataFrame with keywords and metrics\n",
1397 | "df_post_entities = df[post_entities]\n",
1398 | "\n",
1399 | "# Make all metrics numeric\n",
1400 | "for i in metrics:\n",
1401 | " df_post_entities[i] = pd.to_numeric(df_post_entities[i], errors='coerce')\n",
1402 | " \n",
1403 | "# Drop NA Values in Keywords Column\n",
1404 | "df_post_entities['MaxTextEntity'] = df_post_entities['MaxTextEntity'].replace(r'\\s+', np.nan, regex=True)\n",
1405 | "df_post_entities.dropna(subset=['MaxTextEntity'], inplace=True)\n",
1406 | "\n",
1407 | "# Add in a column to distinguish what portion the enrichment was happening \n",
1408 | "df_post_entities[\"Type\"] = \"Posts\""
1409 | ]
1410 | },
1411 | {
1412 | "cell_type": "markdown",
1413 | "metadata": {},
1414 | "source": [
1415 | "#### Combine Post, Thumbnail, and Article DataFrames to Make One Entity DataFrame"
1416 | ]
1417 | },
1418 | {
1419 | "cell_type": "code",
1420 | "execution_count": null,
1421 | "metadata": {},
1422 | "outputs": [],
1423 | "source": [
1424 | "# First make the column headers the same\n",
1425 | "df_post_entities.rename(columns={\"MaxTextEntity\": \"Entities\"}, inplace=True)\n",
1426 | "\n",
1427 | "df_thumbnail_entities.rename(columns={\"MaxThumbnailEntity\":\"Entities\"}, inplace=True)\n",
1428 | "\n",
1429 | "df_article_entities.rename(columns={\"MaxLinkEntity\":\"Entities\"}, inplace=True)\n",
1430 | "\n",
1431 | "# Combine into one data frame\n",
1432 | "df_entities = pd.concat([df_post_entities, df_thumbnail_entities, df_article_entities])\n",
1433 | "\n",
1434 | "df_entities[\"Entities\"] = df_entities[\"Entities\"].replace('', np.nan)\n",
1435 | "df_entities.dropna(subset=[\"Entities\"], inplace=True)"
1436 | ]
1437 | },
1438 | {
1439 | "cell_type": "markdown",
1440 | "metadata": {},
1441 | "source": [
1442 | "\n",
1443 | "### 1.5 Create a Consolidated Image DataFrame"
1444 | ]
1445 | },
1446 | {
1447 | "cell_type": "markdown",
1448 | "metadata": {},
1449 | "source": [
1450 | "#### Combine Metrics with Type Hierarchy, Class and Color to Make One Image DataFrame"
1451 | ]
1452 | },
1453 | {
1454 | "cell_type": "code",
1455 | "execution_count": null,
1456 | "metadata": {},
1457 | "outputs": [],
1458 | "source": [
1459 | "if visual_recognition:\n",
1460 | " # Create a list with only Visual Recognition columns\n",
1461 | " pic_keywords = ['Image Type', 'Image Subtype', 'Image Subtype2', 'Image Class', 'Image Color']\n",
1462 | "\n",
1463 | " # Append DataFrame with these metrics\n",
1464 | " pic_keywords.extend(metrics)\n",
1465 | "\n",
1466 | " # Create a new DataFrame with keywords and metrics\n",
1467 | " df_pic_keywords = df[pic_keywords]\n",
1468 | "\n",
1469 | " # Make all metrics numeric\n",
1470 | " for i in metrics:\n",
1471 | " df_pic_keywords[i] = pd.to_numeric(df_pic_keywords[i], errors='coerce')\n",
1472 | "\n",
1473 | " # Discard posts with lower total reach to make charting easier\n",
1474 | " df_pic_keywords = df_pic_keywords[df_pic_keywords[\"Lifetime Post Total Reach\"] > 15000]"
1475 | ]
1476 | },
1477 | {
1478 | "cell_type": "code",
1479 | "execution_count": null,
1480 | "metadata": {},
1481 | "outputs": [],
1482 | "source": [
1483 | "if visual_recognition:\n",
1484 | " images = df_pic_keywords[df_pic_keywords['Image Type'] != ' ']"
1485 | ]
1486 | },
1487 | {
1488 | "cell_type": "markdown",
1489 | "metadata": {},
1490 | "source": [
1491 | " \n",
1492 | "# Part III\n",
1493 | " \n",
1494 | "## 1. Setup\n",
1495 | "\n",
1496 | "### 1.1 Assign Variables\n",
1497 | "Assign new DataFrames to variables. "
1498 | ]
1499 | },
1500 | {
1501 | "cell_type": "code",
1502 | "execution_count": null,
1503 | "metadata": {},
1504 | "outputs": [],
1505 | "source": [
1506 | "entities = df_entities\n",
1507 | "tones = df_tones\n",
1508 | "keywords = df_keywords"
1509 | ]
1510 | },
1511 | {
1512 | "cell_type": "markdown",
1513 | "metadata": {},
1514 | "source": [
1515 | "\n",
1516 | "## 2. Visualize Data\n",
1517 | " \n",
1518 | "### 2.1 Run PixieDust Visualization Library with Display() API\n",
1519 | "PixieDust lets you visualize your data in just a few clicks using the display() API. You can find more info at https://pixiedust.github.io/pixiedust/displayapi.html."
1520 | ]
1521 | },
1522 | {
1523 | "cell_type": "markdown",
1524 | "metadata": {},
1525 | "source": [
1526 | "#### We can use a pie chart to identify how lifetime engagement was broken up by sentiment. \n",
1527 | "\n",
1528 | "Click on the `Options` button to change the chart. Here are some things to try:\n",
1529 | "* Add *Type* to make the breakdown show *Post* or *Article*.\n",
1530 | "* Show *Emotion* intead of *Sentiment* (or both).\n",
1531 | "* Try a different metric."
1532 | ]
1533 | },
1534 | {
1535 | "cell_type": "code",
1536 | "execution_count": null,
1537 | "metadata": {},
1538 | "outputs": [],
1539 | "source": [
1540 | "import pixiedust"
1541 | ]
1542 | },
1543 | {
1544 | "cell_type": "code",
1545 | "execution_count": null,
1546 | "metadata": {
1547 | "pixiedust": {
1548 | "displayParams": {
1549 | "aggregation": "SUM",
1550 | "chartsize": "70",
1551 | "charttype": "stacked",
1552 | "clusterby": "Type",
1553 | "handlerId": "pieChart",
1554 | "keyFields": "Emotion",
1555 | "legend": "true",
1556 | "mpld3": "false",
1557 | "orientation": "horizontal",
1558 | "rendererId": "matplotlib",
1559 | "rowCount": "100",
1560 | "sortby": "Values DESC",
1561 | "title": "Lifetime Engaged Users by Emotion",
1562 | "valueFields": "Lifetime Engaged Users",
1563 | "ylabel": "true"
1564 | }
1565 | }
1566 | },
1567 | "outputs": [],
1568 | "source": [
1569 | "display(tones)"
1570 | ]
1571 | },
1572 | {
1573 | "cell_type": "markdown",
1574 | "metadata": {},
1575 | "source": [
1576 | "#### Now let's look at the same statistics as a bar chart.\n",
1577 | "\n",
1578 | "It is the same line of code. Use the `Edit Metadata` button to see how PixieDust knows to show us a bar chart. If you don't have a button use the menu and select `View > Cell Toolbar > Edit Metadata`.\n",
1579 | "\n",
1580 | "A bar chart is better at showing more information. We added `Cluster By: Type` so we already see numbers for posts and articles. Notice what the chart tells you. Most of our articles and posts are `positive`. But what sentiment really engages more users? Click on `Options` and try this:\n",
1581 | "\n",
1582 | "* Change the aggregation to `AVG`.\n",
1583 | "\n",
1584 | "What sentiment leads to higher average engagement?\n"
1585 | ]
1586 | },
1587 | {
1588 | "cell_type": "code",
1589 | "execution_count": null,
1590 | "metadata": {
1591 | "pixiedust": {
1592 | "displayParams": {
1593 | "aggregation": "SUM",
1594 | "chartsize": "70",
1595 | "charttype": "stacked",
1596 | "clusterby": "Type",
1597 | "handlerId": "barChart",
1598 | "keyFields": "Sentiment",
1599 | "legend": "true",
1600 | "orientation": "horizontal",
1601 | "rendererId": "matplotlib",
1602 | "rowCount": "100",
1603 | "sortby": "Values DESC",
1604 | "title": "Lifetime Engaged Users by Sentiment",
1605 | "valueFields": "Lifetime Engaged Users"
1606 | }
1607 | },
1608 | "scrolled": false
1609 | },
1610 | "outputs": [],
1611 | "source": [
1612 | "display(tones)"
1613 | ]
1614 | },
1615 | {
1616 | "cell_type": "markdown",
1617 | "metadata": {},
1618 | "source": [
1619 | "#### Now let's look at the entities that were detected by Natural Language Understanding.\n",
1620 | "\n",
1621 | "The following bar chart shows the entities that were detected. This time we are stacking negative feedback and \"likes\" to get a picture of the kind of feedback the entities were getting. We chose a horizontal, stacked bar chart with descending values for a little variety.\n",
1622 | "\n",
1623 | "* Try a different renderer and see what you get."
1624 | ]
1625 | },
1626 | {
1627 | "cell_type": "code",
1628 | "execution_count": null,
1629 | "metadata": {
1630 | "pixiedust": {
1631 | "displayParams": {
1632 | "aggregation": "SUM",
1633 | "chartsize": "70",
1634 | "charttype": "stacked",
1635 | "handlerId": "barChart",
1636 | "keyFields": "Entities",
1637 | "orientation": "horizontal",
1638 | "rendererId": "matplotlib",
1639 | "rowCount": "100",
1640 | "sortby": "Values DESC",
1641 | "title": "Entities in Posts and Articles",
1642 | "valueFields": "Lifetime Post Stories by action type - like,Lifetime Negative Feedback from Users"
1643 | }
1644 | },
1645 | "scrolled": false
1646 | },
1647 | "outputs": [],
1648 | "source": [
1649 | "display(entities)"
1650 | ]
1651 | },
1652 | {
1653 | "cell_type": "markdown",
1654 | "metadata": {},
1655 | "source": [
1656 | "#### Next we look at the keywords detected by Natural Language Understanding\n"
1657 | ]
1658 | },
1659 | {
1660 | "cell_type": "code",
1661 | "execution_count": null,
1662 | "metadata": {
1663 | "pixiedust": {
1664 | "displayParams": {
1665 | "aggregation": "SUM",
1666 | "chartsize": "85",
1667 | "charttype": "stacked",
1668 | "clusterby": "Type",
1669 | "handlerId": "barChart",
1670 | "keyFields": "Keywords",
1671 | "legend": "true",
1672 | "mpld3": "false",
1673 | "orientation": "horizontal",
1674 | "rendererId": "matplotlib",
1675 | "rowCount": "100",
1676 | "sortby": "Values DESC",
1677 | "timeseries": "false",
1678 | "title": "Keyword Total Reach",
1679 | "valueFields": "Lifetime Post Total Reach"
1680 | }
1681 | },
1682 | "scrolled": false
1683 | },
1684 | "outputs": [],
1685 | "source": [
1686 | "display(keywords)"
1687 | ]
1688 | },
1689 | {
1690 | "cell_type": "markdown",
1691 | "metadata": {},
1692 | "source": [
1693 | "#### Now let's take a look at what Visual Recognition can show us.\n",
1694 | "\n",
1695 | "See how the images influenced the metrics. We've used visual recognition to identify a class and a type hierarchy for each image. We've also captured the top recognized color for each image. Our sample data doesn't have a significant number of data points, but these three charts demonstrate how you could:\n",
1696 | "\n",
1697 | "1. Recognize image classes that correlate to higher total reach.\n",
1698 | "1. Add a type hierarchy for a higher level abstraction or to add grouping/stacking to the class data.\n",
1699 | "1. Determine if image color correlates to total reach.\n",
1700 | "\n",
1701 | "Visual recognition makes it surprisingly easy to do all of the above. Of course, you can easily try different metrics as you experiment. If you are not convinced that you should add ultramarine laser pictures to all of your articles, then you might want to do some research with a better data sample."
1702 | ]
1703 | },
1704 | {
1705 | "cell_type": "code",
1706 | "execution_count": null,
1707 | "metadata": {
1708 | "pixiedust": {
1709 | "displayParams": {
1710 | "aggregation": "SUM",
1711 | "chartsize": "85",
1712 | "charttype": "stacked",
1713 | "handlerId": "barChart",
1714 | "keyFields": "Image Class",
1715 | "legend": "true",
1716 | "orientation": "horizontal",
1717 | "rendererId": "matplotlib",
1718 | "rowCount": "100",
1719 | "sortby": "Values DESC",
1720 | "title": "Image Classes",
1721 | "valueFields": "Lifetime Post Total Reach"
1722 | }
1723 | }
1724 | },
1725 | "outputs": [],
1726 | "source": [
1727 | "if visual_recognition:\n",
1728 | " display(images)"
1729 | ]
1730 | },
1731 | {
1732 | "cell_type": "code",
1733 | "execution_count": null,
1734 | "metadata": {
1735 | "pixiedust": {
1736 | "displayParams": {
1737 | "aggregation": "SUM",
1738 | "chartsize": "85",
1739 | "charttype": "stacked",
1740 | "clusterby": "Image Type",
1741 | "handlerId": "barChart",
1742 | "keyFields": "Image Subtype",
1743 | "legend": "true",
1744 | "mpld3": "false",
1745 | "orientation": "horizontal",
1746 | "rendererId": "matplotlib",
1747 | "rowCount": "100",
1748 | "sortby": "Values DESC",
1749 | "stretch": "false",
1750 | "title": "Image Type Hierarchy",
1751 | "valueFields": "Lifetime Post Total Reach"
1752 | }
1753 | }
1754 | },
1755 | "outputs": [],
1756 | "source": [
1757 | "if visual_recognition:\n",
1758 | " display(images)"
1759 | ]
1760 | },
1761 | {
1762 | "cell_type": "code",
1763 | "execution_count": null,
1764 | "metadata": {
1765 | "pixiedust": {
1766 | "displayParams": {
1767 | "aggregation": "SUM",
1768 | "chartsize": "85",
1769 | "charttype": "stacked",
1770 | "handlerId": "barChart",
1771 | "keyFields": "Image Color",
1772 | "legend": "true",
1773 | "orientation": "horizontal",
1774 | "rendererId": "matplotlib",
1775 | "rowCount": "100",
1776 | "sortby": "Values DESC",
1777 | "title": "Image Color",
1778 | "valueFields": "Lifetime Post Total Reach"
1779 | }
1780 | }
1781 | },
1782 | "outputs": [],
1783 | "source": [
1784 | "if visual_recognition:\n",
1785 | " display(images)"
1786 | ]
1787 | },
1788 | {
1789 | "cell_type": "markdown",
1790 | "metadata": {},
1791 | "source": [
1792 | "
\n",
1793 | "Copyright © IBM Corp. 2017, 2018. This notebook and its source code are released under the terms of the Apache 2.0."
1794 | ]
1795 | },
1796 | {
1797 | "cell_type": "markdown",
1798 | "metadata": {},
1799 | "source": [
1800 | "Licensed under the Apache License, Version 2.0 (the \"License\"); you may\n",
1801 | "not use this file except in compliance with the License. You may obtain\n",
1802 | "a copy of the License at\n",
1803 | "\n",
1804 | " http://www.apache.org/licenses/LICENSE-2.0\n",
1805 | "\n",
1806 | "Unless required by applicable law or agreed to in writing, software\n",
1807 | "distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n",
1808 | "WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n",
1809 | "License for the specific language governing permissions and limitations\n",
1810 | "under the License."
1811 | ]
1812 | }
1813 | ],
1814 | "metadata": {
1815 | "kernelspec": {
1816 | "display_name": "Python 3.6",
1817 | "language": "python",
1818 | "name": "python3"
1819 | },
1820 | "language_info": {
1821 | "codemirror_mode": {
1822 | "name": "ipython",
1823 | "version": 3
1824 | },
1825 | "file_extension": ".py",
1826 | "mimetype": "text/x-python",
1827 | "name": "python",
1828 | "nbconvert_exporter": "python",
1829 | "pygments_lexer": "ipython3",
1830 | "version": "3.6.9"
1831 | }
1832 | },
1833 | "nbformat": 4,
1834 | "nbformat_minor": 1
1835 | }
1836 |
--------------------------------------------------------------------------------