├── .gitignore ├── .markdownlint.json ├── .travis.yml ├── ACKNOWLEDGEMENTS.md ├── CONTRIBUTING.md ├── DEBUGGING.md ├── LICENSE ├── MAINTAINERS.md ├── README.md ├── dashboards └── dashboard.json ├── data └── example_facebook_data.csv ├── doc └── source │ └── images │ ├── add_credentials.png │ ├── add_file.png │ ├── add_notebook.png │ ├── architecture.png │ ├── emotion.png │ ├── emotional_engagement.png │ ├── entities.png │ ├── insert_file_credentials.png │ ├── insert_to_code.png │ ├── inserted_pandas.png │ ├── keywords.png │ ├── new_notebook.png │ ├── sentiment.png │ ├── sentimental_engagement.png │ └── studio_project_overview.png ├── examples └── enriched_example_facebook_data.csv ├── manifest.yml └── notebooks └── pixiedust_facebook_analysis.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # VS Code project files (and local history plugin files) 92 | .vscode 93 | .history 94 | 95 | # IDEA IDE projects 96 | .idea 97 | 98 | -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "_docs_for_this_file": "https://github.com/DavidAnson/markdownlint/blob/master/doc/Rules.md", 3 | "line-length": false, 4 | "first-line-h1": false, 5 | "header-style": { 6 | "style": "atx" 7 | }, 8 | "ul-style": { 9 | "style": "asterisk" 10 | }, 11 | "required-headers": { 12 | "headers": [ 13 | "*", 14 | "## Flow", 15 | "## Included components", 16 | "## Steps", 17 | "*", 18 | "## License" 19 | ] 20 | }, 21 | "no-blanks-blockquote": false 22 | } 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | cache: 3 | directories: 4 | - "node_modules" 5 | node_js: 6 | - "lts/*" 7 | before_script: 8 | - npm install -g markdownlint-cli 9 | script: 10 | - markdownlint --config $TRAVIS_BUILD_DIR/.markdownlint.json README.md 11 | - if grep -n -T --before-context 2 --after-context 1 TODO README.md; then exit 1; fi 12 | - if grep -n -T --before-context 2 --after-context 1 FIXME README.md; then exit 1; fi 13 | -------------------------------------------------------------------------------- /ACKNOWLEDGEMENTS.md: -------------------------------------------------------------------------------- 1 | # Acknowledgements 2 | 3 | * Credit goes to [Anna Quincy](https://www.linkedin.com/in/anna-quincy-25042957) and [Tyler Andersen](https://www.linkedin.com/in/tyler-andersen-2bb82336) for providing the initial notebook. 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This is an open source project, and we appreciate your help! 4 | 5 | We use the GitHub issue tracker to discuss new features and non-trivial bugs. 6 | 7 | In addition to the issue tracker, [#journeys on 8 | Slack](https://dwopen.slack.com) is the best way to get into contact with the 9 | project's maintainers. 10 | 11 | To contribute code, documentation, or tests, please submit a pull request to 12 | the GitHub repository. Generally, we expect two maintainers to review your pull 13 | request before it is approved for merging. For more details, see the 14 | [MAINTAINERS](MAINTAINERS.md) page. 15 | 16 | Contributions are subject to the [Developer Certificate of Origin, Version 1.1](https://developercertificate.org/) and the [Apache License, Version 2](https://www.apache.org/licenses/LICENSE-2.0.txt). 17 | -------------------------------------------------------------------------------- /DEBUGGING.md: -------------------------------------------------------------------------------- 1 | Troubleshooting 2 | =============== 3 | 4 | Jupyter Notebooks 5 | ----------------- 6 | 7 | * Make sure the pip install ran correctly. You might need to restart the 8 | kernel and run the cells from the top after the pip install runs the first 9 | time. 10 | * Many of the cells rely on variables that are set in earlier cells. Some of 11 | these are cleared in later cells. Start over at the top when troubleshooting. 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # Maintainers Guide 2 | 3 | This guide is intended for maintainers - anybody with commit access to one or 4 | more Code Pattern repositories. 5 | 6 | ## Methodology 7 | 8 | This repository does not have a traditional release management cycle, but 9 | should instead be maintained as as a useful, working, and polished reference at 10 | all times. While all work can therefore be focused on the master branch, the 11 | quality of this branch should never be compromised. 12 | 13 | The remainder of this document details how to merge pull requests to the 14 | repositories. 15 | 16 | ## Merge approval 17 | 18 | The project maintainers use LGTM (Looks Good To Me) in comments on the pull 19 | request to indicate acceptance prior to merging. A change requires LGTMs from 20 | two project maintainers. If the code is written by a maintainer, the change 21 | only requires one additional LGTM. 22 | 23 | ## Reviewing Pull Requests 24 | 25 | We recommend reviewing pull requests directly within GitHub. This allows a 26 | public commentary on changes, providing transparency for all users. When 27 | providing feedback be civil, courteous, and kind. Disagreement is fine, so long 28 | as the discourse is carried out politely. If we see a record of uncivil or 29 | abusive comments, we will revoke your commit privileges and invite you to leave 30 | the project. 31 | 32 | During your review, consider the following points: 33 | 34 | ### Does the change have positive impact? 35 | 36 | Some proposed changes may not represent a positive impact to the project. Ask 37 | whether or not the change will make understanding the code easier, or if it 38 | could simply be a personal preference on the part of the author (see 39 | [bikeshedding](https://en.wiktionary.org/wiki/bikeshedding)). 40 | 41 | Pull requests that do not have a clear positive impact should be closed without 42 | merging. 43 | 44 | ### Do the changes make sense? 45 | 46 | If you do not understand what the changes are or what they accomplish, ask the 47 | author for clarification. Ask the author to add comments and/or clarify test 48 | case names to make the intentions clear. 49 | 50 | At times, such clarification will reveal that the author may not be using the 51 | code correctly, or is unaware of features that accommodate their needs. If you 52 | feel this is the case, work up a code sample that would address the pull 53 | request for them, and feel free to close the pull request once they confirm. 54 | 55 | ### Does the change introduce a new feature? 56 | 57 | For any given pull request, ask yourself "is this a new feature?" If so, does 58 | the pull request (or associated issue) contain narrative indicating the need 59 | for the feature? If not, ask them to provide that information. 60 | 61 | Are new unit tests in place that test all new behaviors introduced? If not, do 62 | not merge the feature until they are! Is documentation in place for the new 63 | feature? (See the documentation guidelines). If not do not merge the feature 64 | until it is! Is the feature necessary for general use cases? Try and keep the 65 | scope of any given component narrow. If a proposed feature does not fit that 66 | scope, recommend to the user that they maintain the feature on their own, and 67 | close the request. You may also recommend that they see if the feature gains 68 | traction among other users, and suggest they re-submit when they can show such 69 | support. 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/IBM/pixiedust-facebook-analysis.svg?branch=master)](https://travis-ci.org/IBM/pixiedust-facebook-analysis) 2 | 3 | # Uncover insights from Facebook data with Watson services 4 | 5 | ## WARNING: This repository is no longer maintained. 6 | 7 | This repository will not be updated. The repository will be kept available in read-only mode. 8 | 9 | In this code pattern, we will use a Jupyter notebook with Watson Studio to glean insights from a vast body of unstructured data. We'll start with data exported from Facebook Analytics. We'll use Watson’s Natural Language Understanding and Visual Recognition to enrich the data. 10 | 11 | We'll use the enriched data to answer questions like: 12 | 13 | > What emotion is most prevalent in the posts with the highest engagement? 14 | 15 | > What sentiment has the higher engagement score on average? 16 | 17 | > What are the top keywords, entities or images measured by total reach? 18 | 19 | These types of insights are especially beneficial for marketing analysts who are interested in understanding and improving brand perception, product performance, customer satisfaction, and ways to engage their audiences. 20 | 21 | It is important to note that this code pattern is meant to be used as a guided experiment, rather than an application with one set output. The standard Facebook Analytics export features text from posts, articles, and thumbnails, along with standard Facebook performance metrics such as likes, shares, and impressions. This unstructured content was then enriched with Watson APIs to extract keywords, entities, sentiment, and emotion. 22 | 23 | After the data is enriched with Watson APIs, we'll use the Cognos Dashboard Embedded service to add a dashboard to the project. Using the dashboard you can explore our results and build your own sophisticated visualizations to communicate the insights you've discovered. 24 | 25 | This code pattern provides mock Facebook data, a notebook, and comes with several pre-built visualizations to jump start you with uncovering hidden insights. 26 | 27 | When the reader has completed this code pattern, they will understand how to: 28 | 29 | * Read external data in to a Jupyter Notebook via Object Storage and pandas DataFrames. 30 | * Use a Jupyter notebook and Watson APIs to enrich unstructured data. 31 | * Write data from a pandas DataFrame in a Jupyter Notebook out to a file in Object Storage. 32 | * Visualize and explore the enriched data. 33 | 34 | ## Flow 35 | 36 | ![architecture](doc/source/images/architecture.png) 37 | 38 | 1. A CSV file exported from Facebook Analytics is added to Object Storage. 39 | 1. Generated code makes the file accessible as a pandas DataFrame. 40 | 1. The data is enriched with Watson Natural Language Understanding. 41 | 1. The data is enriched with Watson Visual Recognition. 42 | 1. Use a dashboard to visualize the enriched data and uncover hidden insights. 43 | 44 | ## Included components 45 | 46 | * [IBM Watson Studio](https://dataplatform.cloud.ibm.com): Analyze data using RStudio, Jupyter, and Python in a configured, collaborative environment that includes IBM value-adds, such as managed Spark. 47 | * [IBM Watson Natural Language Understanding](https://www.ibm.com/watson/services/natural-language-understanding/): Natural language processing for advanced text analysis. 48 | * [IBM Watson Visual Recognition](https://www.ibm.com/watson/services/visual-recognition/): Understand image content. 49 | * [IBM Cognos Dashboard Embedded](https://cloud.ibm.com/catalog/services/ibm-cognos-dashboard-embedded): The IBM Cognos Dashboard Embedded lets you, the developer, painlessly add end-to-end data visualization capabilities to your application. 50 | * [IBM Cloud Object Storage](https://cloud.ibm.com/catalog/services/cloud-object-storage): An IBM Cloud service that provides an unstructured cloud data store to build and deliver cost effective apps and services with high reliability and fast speed to market. 51 | * [Jupyter Notebooks](https://jupyter.org/): An open-source web application that allows you to create and share documents that contain live code, equations, visualizations and explanatory text. 52 | * [pandas](https://pandas.pydata.org/): A Python library providing high-performance, easy-to-use data structures. 53 | * [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/): Beautiful Soup is a Python library for pulling data out of HTML and XML files. 54 | 55 | ## Steps 56 | 57 | Follow these steps to setup and run this code pattern. The steps are 58 | described in detail below. 59 | 60 | 1. [Clone the repo](#1-clone-the-repo) 61 | 1. [Create a new Watson Studio project](#2-create-a-new-watson-studio-project) 62 | 1. [Add services to the project](#3-add-services-to-the-project) 63 | 1. [Create the notebook in Watson Studio](#4-create-the-notebook-in-watson-studio) 64 | 1. [Add credentials](#5-add-credentials) 65 | 1. [Add the CSV file](#6-add-the-csv-file) 66 | 1. [Run the notebook](#7-run-the-notebook) 67 | 1. [Add a dashboard to the project](#8-add-a-dashboard-to-the-project) 68 | 1. [Analyze the results](#9-analyze-the-results) 69 | 70 | ### 1. Clone the repo 71 | 72 | Clone the `pixiedust-facebook-analysis` repo locally. In a terminal, run the following command: 73 | 74 | ```bash 75 | git clone https://github.com/IBM/pixiedust-facebook-analysis.git 76 | ``` 77 | 78 | ### 2. Create a new Watson Studio project 79 | 80 | * Log into IBM's [Watson Studio](https://dataplatform.cloud.ibm.com). Once in, you'll land on the dashboard. 81 | 82 | * Create a new project by clicking `New project +` and then click on `Create an empty project`. 83 | 84 | * Enter a project name. 85 | 86 | * Choose an existing Object Storage instance or create a new one. 87 | 88 | * Click `Create`. 89 | 90 | * Upon a successful project creation, you are taken to the project `Overview` tab. Take note of the `Assets` and `Settings` tabs, we'll be using them to associate our project with any external assets (datasets and notebooks) and any IBM cloud services. 91 | 92 | ![studio-project-overview](doc/source/images/studio_project_overview.png) 93 | 94 | ### 3. Add services to the project 95 | 96 | * Associate the project with Watson services. To create an instance of each service, go to the `Settings` tab in the new project and scroll down to `Associated Services`. Click `Add service` and select `Watson` from the drop-down menu. Add the service using the free `Lite` plan. Repeat for each of the services used in this pattern: 97 | 98 | * Natural Language Understanding 99 | * Visual Recognition (optional) 100 | 101 | * Once your services are created, copy the credentials and save them for later. You will use them in your Jupyter notebook. 102 | 103 | * Use the upper-left `☰` menu, and select `Services > My Services`. 104 | * Use the 3-dot actions menu to select `Manage in IBM Cloud` for each service. 105 | * Copy each `API key` and `URL` to use in the notebook. 106 | 107 | ### 4. Create the notebook in Watson Studio 108 | 109 | * Go back to your Watson Studio project by using your browser's back button or use the upper-left `☰` menu, and select `Projects` and open your project. 110 | 111 | * Select the `Overview` tab, click `Add to project +` on the top right and choose the `Notebook` asset type. 112 | 113 | ![add_notebook.png](doc/source/images/add_notebook.png) 114 | 115 | * Fill in the following information: 116 | 117 | * Select the `From URL` tab. [1] 118 | * Enter a `Name` for the notebook and optionally a description. [2] 119 | * For `Select runtime` select the `Default Python 3.6 Free` option. [3] 120 | * Under `Notebook URL` provide the following url [4]: 121 | 122 | ```url 123 | https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/master/notebooks/pixiedust_facebook_analysis.ipynb 124 | ``` 125 | 126 | ![new_notebook](doc/source/images/new_notebook.png) 127 | 128 | * Click the `Create notebook` button. 129 | 130 | > **TIP:** Your notebook will appear in the `Notebooks` section of the `Assets` tab. 131 | 132 | ### 5. Add credentials 133 | 134 | Find the notebook cell after `1.5. Add Service Credentials From IBM Cloud for Watson Services`. 135 | 136 | Set the API key and URL for each service. 137 | 138 | ![add_credentials](doc/source/images/add_credentials.png) 139 | 140 | > **Note**: This cell is marked as a `hidden_cell` because it will contain sensitive credentials. 141 | 142 | ### 6. Add the CSV file 143 | 144 | #### Add the CSV file to the notebook 145 | 146 | Use `Find and Add Data` (look for the `01/00` icon) and its `Files` tab. From there you can click `browse` and add a `.csv` file from your computer. 147 | 148 | ![add_file](doc/source/images/add_file.png) 149 | 150 | > **Note**: If you don't have your own data, you can use our example by cloning this git repo. Look in the `data` directory. 151 | 152 | #### Insert to code 153 | 154 | Find the notebook cell after `2.1 Load data from Object Storage`. Place your cursor after `# **Insert to code > Insert pandas DataFrame**`. Make sure this cell is selected before inserting code. 155 | 156 | Using the file that you added above (under the `01/00` Files tab), use the `Insert to code` drop-down menu. Select `pandas DataFrame` from the drop-down menu. 157 | 158 | ![insert_to_code](doc/source/images/insert_to_code.png) 159 | 160 | > **Note**: This cell is marked as a `hidden_cell` because it contains 161 | sensitive credentials. 162 | 163 | ![inserted_pandas](doc/source/images/inserted_pandas.png) 164 | 165 | #### Fix-up df variable name 166 | 167 | The inserted code includes a generated method with credentials and then calls the generated method to set a variable with a name like `df_data_1`. If you do additional inserts, the method can be re-used and the variable will change (e.g. `df_data_2`). 168 | 169 | Later in the notebook, we set `df = df_data_1`. So you might need to fix the variable name `df_data_1` to match your inserted code or vice versa. 170 | 171 | #### Add file credentials 172 | 173 | We want to write the enriched file to the same container that we used above. So now we'll use the same file drop-down to insert credentials. We'll use them later when we write out the enriched CSV file. 174 | 175 | After the `df` setup, there is a cell to enter the file credentials. Place your cursor after the `# insert credentials for file - Change to credentials_1` line. Make sure this cell is selected before inserting credentials. 176 | 177 | Use the CSV file's drop-down menu again. This time select `Insert Credentials`. 178 | 179 | ![insert_file_credentials](doc/source/images/insert_file_credentials.png) 180 | 181 | > **Note**: This cell is marked as a `hidden_cell` because it contains sensitive credentials. 182 | 183 | #### Fix-up credentials variable name 184 | 185 | The inserted code includes a dictionary with credentials assigned to a variable with a name like `credentials_1`. It may have a different name (e.g. `credentials_2`). Rename it or reassign it if needed. The notebook code assumes it will be `credentials_1`. 186 | 187 | ## 7. Run the notebook 188 | 189 | When a notebook is executed, what is actually happening is that each code cell in the notebook is executed, in order, from top to bottom. 190 | 191 | Each code cell is selectable and is preceded by a tag in the left margin. The tag format is `In [x]:`. Depending on the state of the notebook, the `x` can be: 192 | 193 | * A blank, this indicates that the cell has never been executed. 194 | * A number, this number represents the relative order this code step was executed. 195 | * A `*`, this indicates that the cell is currently executing. 196 | 197 | There are several ways to execute the code cells in your notebook: 198 | 199 | * One cell at a time. 200 | * Select the cell, and then press the `Play` button in the toolbar. 201 | * Batch mode, in sequential order. 202 | * From the `Cell` menu bar, there are several options available. For example, you can `Run All` cells in your notebook, or you can `Run All Below`, that will start executing from the first cell under the currently selected cell, and then continue executing all cells that follow. 203 | * At a scheduled time. 204 | * Press the `Schedule` button located in the top right section of your notebook panel. Here you can schedule your notebook to be executed once at some future time, or repeatedly at your specified interval. 205 | 206 | ## 8. Add a dashboard to the project 207 | 208 | ### Add the enriched data as a project data asset 209 | 210 | * Go to the `Assets` tab in the your Watson Studio project click on the `01/00` (Find and add data) icon. 211 | * Select the `enriched_example_facebook_data.csv` file and use the 3-dot pull-down to select `Add as data asset`. 212 | 213 | ### Associate the project with a Dashboard service 214 | 215 | * Go to the `Settings` tab in the new project and scroll down to `Associated Services`. 216 | * Click `Add service` and select `Dashboard` from the drop-down menu. 217 | * Create the service using the free `Lite` plan. 218 | 219 | ### Load the provided dashboard.json file 220 | 221 | * Click the `Add to project +` button and select `Dashboard`. 222 | * Select the `From file` tab and use the `Select file` button to open the file `dashboards/dashboard.json` from your local repo. 223 | * Select your Cognos Dashboard Embedded service from the list. 224 | * Hit `Create`. 225 | * If you are asked to re-link the data set, select your `enriched_example_facebook_data.csv` asset. 226 | 227 | ## 9. Analyze the results 228 | 229 | If you walk through the cells, you will see that we demonstrated how to do the following: 230 | 231 | * Install external libraries from PyPI 232 | * Create clients to connect to Watson cognitive services 233 | * Load data from a local CSV file to a pandas DataFrame (via Object Storage) 234 | * Do some data manipulation with pandas 235 | * Use BeautifulSoup 236 | * Use Natural Language Understanding 237 | * Use Visual Recognition 238 | * Save the enriched data in a CSV file in Object Storage 239 | 240 | When you try the dashboard, you will see: 241 | 242 | * How to add a dashboard to a Watson Studio project 243 | * How to import a dashboard JSON file 244 | * Linking a dashboard to data saved in Cloud Object Storage 245 | * An example with tabs and a variety of charts 246 | * A dashboard tool that you can use to explore your data and create new visualizations to share 247 | 248 | ## Sample output 249 | 250 | The provided dashboard uses four tabs to show four simple charts: 251 | 252 | * Emotion 253 | * Sentiment 254 | * Entities 255 | * Keywords 256 | 257 | The enriched data contains emotions, sentiment, entities, and keywords that were added using Natural Language Understanding to process the posts, links, and thumbnails. Combining the enrichment with the metrics from Facebook gives us a huge number of options for what we could show on the dashboard. The dashboard editor also allows you great flexibility on how you arrange your dashboard and visualize your data. The example demonstrates the following: 258 | 259 | * A word-cloud showing the keywords sized by total impressions and using color to show the sentiment 260 | 261 | ![keywords.png](doc/source/images/keywords.png) 262 | 263 | * A pie chart showing total reach by emotion 264 | 265 | ![emotion.png](doc/source/images/emotion.png) 266 | 267 | * A stacked bar chart showing likes, shares, and comments by post sentiment 268 | 269 | ![sentiment.png](doc/source/images/sentiment.png) 270 | 271 | * A bar chart with a line overlay, showing total impressions and paid impressions by mentioned entity 272 | 273 | ![entities.png](doc/source/images/entities.png) 274 | 275 | ## License 276 | 277 | This code pattern is licensed under the Apache License, Version 2. Separate third-party code objects invoked within this code pattern are licensed by their respective providers pursuant to their own separate licenses. Contributions are subject to the [Developer Certificate of Origin, Version 1.1](https://developercertificate.org/) and the [Apache License, Version 2](https://www.apache.org/licenses/LICENSE-2.0.txt). 278 | 279 | [Apache License FAQ](https://www.apache.org/foundation/license-faq.html#WhatDoesItMEAN) 280 | -------------------------------------------------------------------------------- /dashboards/dashboard.json: -------------------------------------------------------------------------------- 1 | {"name":"New dashboard","layout":{"id":"page0","items":[{"id":"page1","items":[{"id":"page2","css":"templateBox aspectRatio_default","items":[{"id":"page3","style":{"top":"0%","left":"0%","right":"75%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1","relatedLayouts":""},{"id":"page4","css":"noBorderLeft","style":{"top":"0%","left":"25%","right":"50%","bottom":"75%"},"type":"templateDropZone","templateName":"dz3"},{"id":"page5","css":"noBorderLeft","style":{"top":"0%","left":"50%","right":"25%","bottom":"75%"},"type":"templateDropZone","templateName":"dz4"},{"id":"page6","css":"noBorderLeft","style":{"top":"0%","left":"75%","right":"00%","bottom":"75%"},"type":"templateDropZone","templateName":"dz5"},{"id":"page7","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2","relatedLayouts":""},{"id":"model000001737d90789a_00000000","style":{"top":"10.51930758988016%","left":"0.1466275659824047%","height":"49.80026631158455%","width":"46.18768328445748%"},"type":"widget","relatedLayouts":""},{"id":"model000001737d9504c7_00000000","style":{"width":"49.853372434017594%","height":"10.252996005326231%","top":"0.2663115845539281%","left":"0.1466275659824047%","transform":"matrix(0.999999, 0.00123007, -0.00123007, 0.999999, 0, 0)","-webkit-transform":"matrix(0.999999, 0.00123007, -0.00123007, 0.999999, 0, 0)"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Emotion"}},"templateName":"Template4"},{"id":"model000001737d8fcb55_00000000","items":[{"id":"model000001737d8fcb56_00000000","css":"templateBox aspectRatio_default","items":[{"id":"model000001737d8fcb56_00000001","style":{"top":"0%","left":"0%","right":"75%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1","relatedLayouts":""},{"id":"model000001737d8fcb57_00000000","css":"noBorderLeft","style":{"top":"0%","left":"25%","right":"50%","bottom":"75%"},"type":"templateDropZone","templateName":"dz3"},{"id":"model000001737d8fcb57_00000001","css":"noBorderLeft","style":{"top":"0%","left":"50%","right":"25%","bottom":"75%"},"type":"templateDropZone","templateName":"dz4"},{"id":"model000001737d8fcb57_00000002","css":"noBorderLeft","style":{"top":"0%","left":"75%","right":"00%","bottom":"75%"},"type":"templateDropZone","templateName":"dz5"},{"id":"model000001737d8fcb57_00000003","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2"},{"id":"model000001737d96e769_00000000","style":{"width":"66.49560117302053%","height":"9.986684420772304%","top":"0.13315579227696406%","left":"0.07331378299120235%","transform":"matrix(0.999993, 0.00368788, -0.00368788, 0.999993, 0, 0)","-webkit-transform":"matrix(0.999993, 0.00368788, -0.00368788, 0.999993, 0, 0)"},"type":"widget","relatedLayouts":""},{"id":"model000001737d9792c5_00000000","style":{"left":"0.07331378299120235%","top":"9.85352862849534%","height":"38.348868175765645%","width":"86.43695014662757%"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Sentiment"}},"templateName":"Template4"},{"id":"model000001737df944ca_00000000","items":[{"id":"model000001737df944cb_00000000","css":"templateBox aspectRatio_default","items":[{"id":"model000001737df944cb_00000001","style":{"top":"0%","left":"0%","right":"0%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1"},{"id":"model000001737df944cb_00000002","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2","relatedLayouts":""},{"id":"model000001737df97403_00000000","style":{"width":"47.800586510263926%","height":"9.454061251664447%","left":"0.07331378299120235%","top":"0.13315579227696406%"},"type":"widget","relatedLayouts":""},{"id":"model000001737df9f2c0_00000000","style":{"left":"0.07331378299120235%","top":"9.587217043941411%","height":"43.00932090545939%","width":"99.9266862170088%"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Entities"}},"templateName":"Template2"},{"id":"model000001737e05eb20_00000000","items":[{"id":"model000001737e05eb21_00000000","css":"templateBox aspectRatio_default","items":[{"id":"model000001737e05eb21_00000001","style":{"top":"0%","left":"0%","right":"0%","bottom":"75%"},"type":"templateDropZone","templateName":"dz1","relatedLayouts":""},{"id":"model000001737e05eb21_00000002","css":"noBorderTop","style":{"top":"25%","left":"0%","right":"0%","bottom":"0%"},"type":"templateDropZone","templateName":"dz2","relatedLayouts":""},{"id":"model000001737e063c9c_00000000","style":{"left":"0.07331378299120235%","top":"16.37816245006658%","height":"70.97203728362184%","width":"68.841642228739%"},"type":"widget","relatedLayouts":""},{"id":"model000001737e07fcd2_00000000","style":{"width":"70.01466275659824%","height":"10.652463382157125%","left":"0.07331378299120235%","top":"1.8641810918774966%"},"type":"widget","relatedLayouts":""}],"type":"scalingAbsolute"}],"type":"container","title":{"translationTable":{"Default":"Keywords"}},"templateName":"Template2"}],"style":{"height":"100%"},"type":"tab"},"theme":"defaultTheme","version":1009,"eventGroups":[{"id":"page1:1","widgetIds":["model000001737d90789a_00000000"]},{"id":"model000001737d8fcb55_00000000:1","widgetIds":["model000001737d9792c5_00000000"]},{"id":"model000001737df944ca_00000000:1","widgetIds":["model000001737df9f2c0_00000000"]},{"id":"model000001737e05eb20_00000000:1","widgetIds":["model000001737e063c9c_00000000"]}],"properties":{"defaultLocale":"Default"},"dataSources":{"version":"1.0","sources":[{"id":"model000001737d8ed155_00000001","assetId":"assetId000001737d8ed154_00000000","clientId":"41cdf9f5-0401-49b3-8f31-31e986ac3625:747b0a56-b605-4bff-8cdb-3b30822c9295:747b0a56-b605-4bff-8cdb-3b30822c9295:data_asset:enriched_example_facebook_data.csv","module":{"xsd":"https://ibm.com/daas/module/1.0/module.xsd","source":{"id":"41cdf9f5-0401-49b3-8f31-31e986ac3625:747b0a56-b605-4bff-8cdb-3b30822c9295:747b0a56-b605-4bff-8cdb-3b30822c9295:data_asset:enriched_example_facebook_data.csv","srcUrl":{"sourceUrl":"{enc}fH+x5OzLtjJ0G9vSAwJ48i7ydshJYh0CBG4b4CUkDXQpmSlw1v56uYA7dwoeHokqnmhDRvIDiK7eAPHWvgCvHD65ss8nNoGVIHwqwSsxIiE+5zc4AySEUWqPubddjgKGU4qP8IsF32ZvC338eNZpMvke9EyfArgbDrMizJHPR4Fob0v6uyeMj7iCUJlcs7WqnVeWM0nM89Nk2CrZ/rsZOoTKAulIsBfHgMKNa0QqVDlBhCls9ds32hEjwJDr1dnhRdF+UfataJk+0B8i9db4yYRcvEJERzNiXPZ3jTdEXdj1oZDcR48BKcHmeX4zii3A/CEQkXCoV3fhfVqgvn4qlg==","mimeType":"text/csv","property":[{"name":"headers","value":[{"name":"Authorization","value":"{enc}RYdkdiC0Nt73AYv8fl/cNuogEMfUjfwqqkzaFOvAl5RrI0pRUuSKxFqogZ4nMTTmemX78DVrom45zSRWxNInJ5xEnh6qNbNRFzKzbwbKudlQjZE2xlGOXDfCgkDDo0dE92eeqaAp7QRa3ZM7orGurMGMeWUPdjsnJRAuGZoTGZWCN9ub9BDGPKAnrNP6xSZ7rPQrdo6b91Xrv5UDr3Hutb0Jrb6/+F4+ugzvp1iVWBWjw23hQKMDDEUlYw5m1T+6HW6R6LJyHG68s5qmCGbPWV4ovsVm3MJEBE5eIuX4mchHm/sMEu0H0fKV0rZ46N6pi892tIwNU8hRcWjR+1zmVw=="}]}]}},"table":{"column":[{"datatype":"NVARCHAR(512)","name":"Link","label":"Link","description":"Link","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Post_ID","label":"Post ID","description":"Post ID","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Permalink","label":"Permalink","description":"Permalink","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Text","label":"Text","description":"Text","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Type","label":"Type","description":"Type","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Countries","label":"Countries","description":"Countries","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Languages","label":"Languages","description":"Languages","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Posted","label":"Posted","description":"Posted","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Audience_Targeting","label":"Audience Targeting","description":"Audience Targeting","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Total_Reach","label":"Lifetime Post Total Reach","description":"Lifetime Post Total Reach","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_organic_reach","label":"Lifetime Post organic reach","description":"Lifetime Post organic reach","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Paid_Reach","label":"Lifetime Post Paid Reach","description":"Lifetime Post Paid Reach","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Total_Impressions","label":"Lifetime Post Total Impressions","description":"Lifetime Post Total Impressions","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Organic_Impressions","label":"Lifetime Post Organic Impressions","description":"Lifetime Post Organic Impressions","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Paid_Impressions","label":"Lifetime Post Paid Impressions","description":"Lifetime Post Paid Impressions","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Engaged_Users","label":"Lifetime Engaged Users","description":"Lifetime Engaged Users","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Matched_Audience_Targeting_Consumers_on_Post","label":"Lifetime Matched Audience Targeting Consumers on Post","description":"Lifetime Matched Audience Targeting Consumers on Post","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Matched_Audience_Targeting_Consumptions_on_Post","label":"Lifetime Matched Audience Targeting Consumptions on Post","description":"Lifetime Matched Audience Targeting Consumptions on Post","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Negative_Feedback_from_Users","label":"Lifetime Negative Feedback from Users","description":"Lifetime Negative Feedback from Users","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Negative_Feedback","label":"Lifetime Negative Feedback","description":"Lifetime Negative Feedback","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Impressions_by_people_who_have_liked_your_Page","label":"Lifetime Post Impressions by people who have liked your Page","description":"Lifetime Post Impressions by people who have liked your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_reach_by_people_who_like_your_Page","label":"Lifetime Post reach by people who like your Page","description":"Lifetime Post reach by people who like your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Paid_Impressions_by_people_who_have_liked_your_Page","label":"Lifetime Post Paid Impressions by people who have liked your Page","description":"Lifetime Post Paid Impressions by people who have liked your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_reach_of_a_post_by_people_who_like_your_Page","label":"Lifetime Paid reach of a post by people who like your Page","description":"Lifetime Paid reach of a post by people who like your Page","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_People_who_have_liked_your_Page_and_engaged_with_your_post","label":"Lifetime People who have liked your Page and engaged with your post","description":"Lifetime People who have liked your Page and engaged with your post","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Organic_views_to_95_","label":"Lifetime Organic views to 95%","description":"Lifetime Organic views to 95%","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Organic_views_to_95_1","label":"Lifetime Organic views to 95%.1","description":"Lifetime Organic views to 95%.1","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_views_to_95_","label":"Lifetime Paid views to 95%","description":"Lifetime Paid views to 95%","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_views_to_95_1","label":"Lifetime Paid views to 95%.1","description":"Lifetime Paid views to 95%.1","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Organic_Video_Views","label":"Lifetime Organic Video Views","description":"Lifetime Organic Video Views","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Organic_Video_Views_1","label":"Lifetime Organic Video Views.1","description":"Lifetime Organic Video Views.1","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_Video_Views","label":"Lifetime Paid Video Views","description":"Lifetime Paid Video Views","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Paid_Video_Views_1","label":"Lifetime Paid Video Views.1","description":"Lifetime Paid Video Views.1","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Average_time_video_viewed","label":"Lifetime Average time video viewed","description":"Lifetime Average time video viewed","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Video_length","label":"Lifetime Video length","description":"Lifetime Video length","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Talking_About_This_Post_by_action_type_share","label":"Lifetime Talking About This (Post) by action type - share","description":"Lifetime Talking About This (Post) by action type - share","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Talking_About_This_Post_by_action_type_like","label":"Lifetime Talking About This (Post) by action type - like","description":"Lifetime Talking About This (Post) by action type - like","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Talking_About_This_Post_by_action_type_comment","label":"Lifetime Talking About This (Post) by action type - comment","description":"Lifetime Talking About This (Post) by action type - comment","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Post_Stories_by_action_type_share","label":"Lifetime Post Stories by action type - share","description":"Lifetime Post Stories by action type - share","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Post_Stories_by_action_type_like","label":"Lifetime Post Stories by action type - like","description":"Lifetime Post Stories by action type - like","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Post_Stories_by_action_type_comment","label":"Lifetime Post Stories by action type - comment","description":"Lifetime Post Stories by action type - comment","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_other_clicks","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - other clicks","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - other clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_link_clicks","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - link clicks","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - link clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_photo_view","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - photo view","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - photo view","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Post_Audience_Targeting_Unique_Consumptions_by_Type_video_play","label":"Lifetime Post Audience Targeting Unique Consumptions by Type - video play","description":"Lifetime Post Audience Targeting Unique Consumptions by Type - video play","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"BIGINT","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_other_clicks","label":"Lifetime Matched Audience Targeting Consumptions by Type - other clicks","description":"Lifetime Matched Audience Targeting Consumptions by Type - other clicks","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_link_clicks","label":"Lifetime Matched Audience Targeting Consumptions by Type - link clicks","description":"Lifetime Matched Audience Targeting Consumptions by Type - link clicks","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_photo_view","label":"Lifetime Matched Audience Targeting Consumptions by Type - photo view","description":"Lifetime Matched Audience Targeting Consumptions by Type - photo view","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Matched_Audience_Targeting_Consumptions_by_Type_video_play","label":"Lifetime Matched Audience Targeting Consumptions by Type - video play","description":"Lifetime Matched Audience Targeting Consumptions by Type - video play","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_from_Users_by_Type_hide_all_clicks","label":"Lifetime Negative Feedback from Users by Type - hide_all_clicks","description":"Lifetime Negative Feedback from Users by Type - hide_all_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_from_Users_by_Type_hide_clicks","label":"Lifetime Negative Feedback from Users by Type - hide_clicks","description":"Lifetime Negative Feedback from Users by Type - hide_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_from_Users_by_Type_report_spam_clicks","label":"Lifetime Negative Feedback from Users by Type - report_spam_clicks","description":"Lifetime Negative Feedback from Users by Type - report_spam_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_by_Type_hide_all_clicks","label":"Lifetime Negative Feedback by Type - hide_all_clicks","description":"Lifetime Negative Feedback by Type - hide_all_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_by_Type_hide_clicks","label":"Lifetime Negative Feedback by Type - hide_clicks","description":"Lifetime Negative Feedback by Type - hide_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Lifetime_Negative_Feedback_by_Type_report_spam_clicks","label":"Lifetime Negative Feedback by Type - report_spam_clicks","description":"Lifetime Negative Feedback by Type - report_spam_clicks","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Thumbnails","label":"Thumbnails","description":"Thumbnails","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Image","label":"Image","description":"Image","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Extended_Links","label":"Extended Links","description":"Extended Links","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextHighestEmotion","label":"TextHighestEmotion","description":"TextHighestEmotion","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"TextHighestEmotionScore","label":"TextHighestEmotionScore","description":"TextHighestEmotionScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextOverallSentimentType","label":"TextOverallSentimentType","description":"TextOverallSentimentType","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"VARCHAR(64)","name":"TextOverallSentimentScore","label":"TextOverallSentimentScore","description":"TextOverallSentimentScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextKeywords","label":"TextKeywords","description":"TextKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"TextEntities","label":"TextEntities","description":"TextEntities","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxTextKeywords","label":"MaxTextKeywords","description":"MaxTextKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxTextEntity","label":"MaxTextEntity","description":"MaxTextEntity","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailHighestEmotion","label":"ThumbnailHighestEmotion","description":"ThumbnailHighestEmotion","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"ThumbnailHighestEmotionScore","label":"ThumbnailHighestEmotionScore","description":"ThumbnailHighestEmotionScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailOverallSentimentType","label":"ThumbnailOverallSentimentType","description":"ThumbnailOverallSentimentType","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"VARCHAR(64)","name":"ThumbnailOverallSentimentScore","label":"ThumbnailOverallSentimentScore","description":"ThumbnailOverallSentimentScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailKeywords","label":"ThumbnailKeywords","description":"ThumbnailKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"ThumbnailEntities","label":"ThumbnailEntities","description":"ThumbnailEntities","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxThumbnailKeywords","label":"MaxThumbnailKeywords","description":"MaxThumbnailKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxThumbnailEntity","label":"MaxThumbnailEntity","description":"MaxThumbnailEntity","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkHighestEmotion","label":"LinkHighestEmotion","description":"LinkHighestEmotion","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"LinkHighestEmotionScore","label":"LinkHighestEmotionScore","description":"LinkHighestEmotionScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkOverallSentimentType","label":"LinkOverallSentimentType","description":"LinkOverallSentimentType","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"DOUBLE","name":"LinkOverallSentimentScore","label":"LinkOverallSentimentScore","description":"LinkOverallSentimentScore","usage":"fact","regularAggregate":"total","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkKeywords","label":"LinkKeywords","description":"LinkKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"LinkEntities","label":"LinkEntities","description":"LinkEntities","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"Article_Text","label":"Article Text","description":"Article Text","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxLinkKeywords","label":"MaxLinkKeywords","description":"MaxLinkKeywords","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true},{"datatype":"NVARCHAR(512)","name":"MaxLinkEntity","label":"MaxLinkEntity","description":"MaxLinkEntity","usage":"attribute","regularAggregate":"countDistinct","taxonomyFamily":"cNone","nullable":true}],"name":"enriched_example_facebook_data_csv","label":"enriched_example_facebook_data.csv"},"id":"41cdf9f5-0401-49b3-8f31-31e986ac3625:747b0a56-b605-4bff-8cdb-3b30822c9295:747b0a56-b605-4bff-8cdb-3b30822c9295:data_asset:enriched_example_facebook_data.csv","label":"enriched_example_facebook_data.csv","identifier":"enriched_example_facebook_data_csv"},"name":"enriched_example_facebook_data.csv","shaping":{"embeddedModuleUpToDate":true}}]},"pageContext":[],"drillThrough":[],"widgets":{"model000001737d90789a_00000000":{"id":"model000001737d90789a_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737d91e983_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Total_Reach","itemLabel":"Lifetime Post Total Reach"},{"id":"model000001737d938ceb_00000000","itemId":"enriched_example_facebook_data_csv.TextHighestEmotion","itemLabel":"TextHighestEmotion"}],"id":"model000001737d91e982_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737d938ceb_00000000"],"dataItemSettings":[],"caption":"Segments","id":"categories","layerId":"data"},{"name":"values","dataItems":["model000001737d91e983_00000000"],"caption":"Size","id":"values","layerId":"data"}]},"type":"live","visId":"com.ibm.vis.rave2bundlepie","name":{"translationTable":{}}},"model000001737d9504c7_00000000":{"id":"model000001737d9504c7_00000000","type":"text","content":{"translationTable":{"Default":"

Total Reach by Emotion

"}},"isResponsive":true,"visTypeLocked":true,"name":""},"model000001737d96e769_00000000":{"id":"model000001737d96e769_00000000","type":"text","content":{"translationTable":{"Default":"

Likes, Shares, and Comments by Sentiment


"}},"isResponsive":true,"visTypeLocked":true,"name":""},"model000001737d9792c5_00000000":{"id":"model000001737d9792c5_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737d9c7fcb_00000000","itemId":"enriched_example_facebook_data_csv.TextOverallSentimentType","itemLabel":"TextOverallSentimentType"},{"id":"_multiMeasuresSeries","itemId":"_multiMeasuresSeries","itemLabel":"Measures group (3)"},{"id":"model000001737df516f1_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Talking_About_This_Post_by_action_type_share","itemLabel":"Lifetime Talking About This (Post) by action type - share"},{"id":"model000001737df5829e_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Talking_About_This_Post_by_action_type_like","itemLabel":"Lifetime Talking About This (Post) by action type - like"},{"id":"model000001737df58e90_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Talking_About_This_Post_by_action_type_comment","itemLabel":"Lifetime Talking About This (Post) by action type - comment"}],"id":"model000001737d988928_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737d9c7fcb_00000000"],"dataItemSettings":[],"caption":"Bars","id":"categories","layerId":"data"},{"name":"values","dataItems":["model000001737df516f1_00000000","model000001737df5829e_00000000","model000001737df58e90_00000000"],"caption":"Length","id":"values","layerId":"data"},{"name":"color","dataItems":["_multiMeasuresSeries"],"caption":"Color","id":"color"}]},"type":"live","visId":"com.ibm.vis.rave2bundlestackedbar","name":{"translationTable":{}},"localFilters":[]},"model000001737df97403_00000000":{"id":"model000001737df97403_00000000","type":"text","content":{"translationTable":{"Default":"

Impressions by Entity

"}},"isResponsive":true,"visTypeLocked":true,"name":""},"model000001737df9f2c0_00000000":{"id":"model000001737df9f2c0_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737e01e0ee_00000000","itemId":"enriched_example_facebook_data_csv.MaxTextEntity","itemLabel":"MaxTextEntity"},{"id":"model000001737e03f567_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Total_Impressions","itemLabel":"Lifetime Post Total Impressions"},{"id":"model000001737e046601_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Paid_Impressions","itemLabel":"Lifetime Post Paid Impressions"}],"id":"model000001737dfa4df9_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737e01e0ee_00000000"],"dataItemSettings":[],"caption":"x-axis","id":"categories","layerId":"data"},{"name":"columnValue","dataItems":["model000001737e03f567_00000000"],"caption":"Length","id":"columnValue","layerId":"data"},{"name":"lineValue","dataItems":["model000001737e046601_00000000"],"caption":"Line position","id":"lineValue"}]},"type":"live","visId":"com.ibm.vis.rave2bundlecomposite","name":{"translationTable":{}},"localFilters":[]},"model000001737e063c9c_00000000":{"id":"model000001737e063c9c_00000000","data":{"dataViews":[{"modelRef":"model000001737d8ed155_00000001","dataItems":[{"id":"model000001737e065fc7_00000000","itemId":"enriched_example_facebook_data_csv.MaxLinkKeywords","itemLabel":"MaxLinkKeywords"},{"id":"model000001737e06cdee_00000000","itemId":"enriched_example_facebook_data_csv.Lifetime_Post_Total_Impressions","itemLabel":"Lifetime Post Total Impressions"},{"id":"model000001737e07229a_00000000","itemId":"enriched_example_facebook_data_csv.LinkOverallSentimentType","itemLabel":"LinkOverallSentimentType"}],"id":"model000001737e065fc6_00000000"}]},"visTypeLocked":true,"slotmapping":{"slots":[{"name":"categories","dataItems":["model000001737e065fc7_00000000"],"dataItemSettings":[],"caption":"Words","id":"categories"},{"name":"size","dataItems":["model000001737e06cdee_00000000"],"caption":"Size","id":"size"},{"name":"color","dataItems":["model000001737e07229a_00000000"],"caption":"Color","id":"color"}]},"type":"live","visId":"com.ibm.vis.rave2bundlewordcloud","name":{"translationTable":{}}},"model000001737e07fcd2_00000000":{"id":"model000001737e07fcd2_00000000","type":"text","content":{"translationTable":{"Default":"

Keyword-cloud showing impressions and sentiment


"}},"isResponsive":true,"visTypeLocked":true,"name":""}}} -------------------------------------------------------------------------------- /doc/source/images/add_credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/add_credentials.png -------------------------------------------------------------------------------- /doc/source/images/add_file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/add_file.png -------------------------------------------------------------------------------- /doc/source/images/add_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/add_notebook.png -------------------------------------------------------------------------------- /doc/source/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/architecture.png -------------------------------------------------------------------------------- /doc/source/images/emotion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/emotion.png -------------------------------------------------------------------------------- /doc/source/images/emotional_engagement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/emotional_engagement.png -------------------------------------------------------------------------------- /doc/source/images/entities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/entities.png -------------------------------------------------------------------------------- /doc/source/images/insert_file_credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/insert_file_credentials.png -------------------------------------------------------------------------------- /doc/source/images/insert_to_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/insert_to_code.png -------------------------------------------------------------------------------- /doc/source/images/inserted_pandas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/inserted_pandas.png -------------------------------------------------------------------------------- /doc/source/images/keywords.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/keywords.png -------------------------------------------------------------------------------- /doc/source/images/new_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/new_notebook.png -------------------------------------------------------------------------------- /doc/source/images/sentiment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/sentiment.png -------------------------------------------------------------------------------- /doc/source/images/sentimental_engagement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/sentimental_engagement.png -------------------------------------------------------------------------------- /doc/source/images/studio_project_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/pixiedust-facebook-analysis/752eb287732a20cfe669871e418dbc18b7127257/doc/source/images/studio_project_overview.png -------------------------------------------------------------------------------- /manifest.yml: -------------------------------------------------------------------------------- 1 | declared-services: 2 | pfa-visual-recognition: 3 | label: watson_vision_combined 4 | plan: lite 5 | pfa-natural-language-understanding: 6 | label: natural-language-understanding 7 | plan: free 8 | pfa-tone-analyzer: 9 | label: tone_analyzer 10 | plan: lite 11 | applications: 12 | - services: 13 | - pfa-visual-recognition 14 | - pfa-natural-language-understanding 15 | - pfa-tone-analyzer 16 | memory: 128M 17 | no-route: true 18 | name: pixiedust-facebook-analysis 19 | health-check-type: none 20 | buildpack: noop-buildpack 21 | -------------------------------------------------------------------------------- /notebooks/pixiedust_facebook_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Analyze Facebook Data Using IBM Watson and IBM Watson Studio\n", 10 | "\n", 11 | "This is a three-part notebook meant to show how anyone can enrich and analyze a combined dataset of unstructured and structured information with IBM Watson and IBM Watson Studio. For this example we are using a standard Facebook Analytics export which features texts from posts, articles and thumbnails, along with standard performance metrics such as likes, shares, and impressions. \n", 12 | "\n", 13 | "**Part I** will use the Natural Language Understanding and (optionally) Visual Recognition services from IBM Watson to enrich the Facebook posts, thumbnails, and articles by pulling out `Sentiment`, `Emotion`, `Entities`, `Keywords`, and `Images`. The end result of Part I will be additional features and metrics we can visualize in Part III. \n", 14 | "\n", 15 | "**Part II** will set up multiple pandas DataFrames that will contain the values, and metrics needed to find insights in the Part III tests and experiments.\n", 16 | "\n", 17 | "**Part III** will use charts to visualize the features that we discovered during enrichment and show how they correlate with customer impressions.\n", 18 | "\n", 19 | "\n", 20 | "#### You should only need to change data in the Setup portion of this notebook. All places where you see User Input is where you should be adding inputs. \n", 21 | "\n", 22 | "### Table of Contents\n", 23 | "\n", 24 | "### [**Part I - Enrich**](#part1)
\n", 25 | "1. [Setup](#setup)
\n", 26 | " 1.1 [Install Watson Developer Cloud and BeautifulSoup Packages](#setup1)
\n", 27 | " 1.2 [Install PixieDust](#pixie)
\n", 28 | " 1.3 [Restart Kernel](#restart)
\n", 29 | " 1.4 [Import Packages and Libraries](#setup2)
\n", 30 | " 1.5 [Add Service Credentials From IBM Cloud for Watson Services](#setup3)
\n", 31 | "2. [Load Data](#load)
\n", 32 | " 2.1 [Load Data From Cloud Object Storage as a pandas DataFrame](#load1)
\n", 33 | " 2.2 [Set Variables](#load2)
\n", 34 | "3. [Prepare Data](#prepare)
\n", 35 | " 3.1 [Data Cleansing with Python](#prepare1)
\n", 36 | " 3.2 [Beautiful Soup to Extract Thumbnails and Extented Links](#prepare2)
\n", 37 | "4. [Enrich Data](#enrich)
\n", 38 | " 4.1 [NLU for Post Text](#nlupost)
\n", 39 | " 4.2 [NLU for Thumbnail Text](#nlutn)
\n", 40 | " 4.3 [NLU for Article Text](#nlulink)
\n", 41 | " 4.4 [Visual Recognition](#visual)
\n", 42 | "5. [Write Data](#write)
\n", 43 | " 5.1 [Convert DataFrame to new CSV](#write1)
\n", 44 | " 5.2 [Write Data to Cloud Object Storage](#write2)
\n", 45 | " \n", 46 | "### [**Part II - Data Preparation**](#part2)
\n", 47 | "1. [Prepare Data](#prepare)
\n", 48 | " 1.1 [Create Multiple DataFrames for Visualizations](#visualizations)
\n", 49 | " 1.2 [Create a Consolidated Sentiment and Emotion DataFrame](#tone)
\n", 50 | " 1.3 [Create a Consolidated Keyword DataFrame](#keyword)
\n", 51 | " 1.4 [Create a Consolidated Entity DataFrame](#entity)
\n", 52 | " \n", 53 | "### [**Part III - Analyze**](#part3)
\n", 54 | "\n", 55 | "1. [Setup](#2setup)
\n", 56 | " 1.1 [Assign Variables](#2setup2)
\n", 57 | "2. [Visualize Data](#2visual)
\n", 58 | " 2.1 [Run PixieDust Visualization Library with Display() API](#2visual1)\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "# Part I - Enrich\n", 66 | "\n", 67 | "## 1. Setup\n", 68 | "To prepare your environment, you need to install some packages and enter credentials for the Watson services.\n", 69 | "\n", 70 | "### 1.1 Install Latest Watson Developer Cloud and Beautiful Soup Packages\n", 71 | "You need to install these packages:\n", 72 | " - [Watson APIs Python SDK](https://github.com/watson-developer-cloud/python-sdk): a client library for Watson services.\n", 73 | " - Beautiful Soup: a library to parse data from HTML for enriching the Facebook data.\n", 74 | " - PixieDust: a library to visualize the data. \n", 75 | "\n", 76 | "Install the Watson Python SDK package:" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "!pip -q install --user --no-warn-script-location ibm-watson==4.3.0" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Install the Beautiful Soup package:" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "!pip -q install --user beautifulsoup4==4.8.2" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "\n", 109 | "### 1.2 Install PixieDust Library" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "!pip -q install --user --no-warn-script-location --upgrade pixiedust==1.1.14" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "\n", 126 | "### 1.3 Restart Kernel\n", 127 | "> Required after installs/upgrades only.\n", 128 | "\n", 129 | "If any libraries were just installed or upgraded, restart the kernel before continuing. After this has been done once, you might want to comment out the `!pip install` lines above for cleaner output and a faster \"Run All\"." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "\n", 137 | "### 1.4 Import Packages and Libraries\n", 138 | "> Tip: To check if you have a package installed, open a new cell and write: `help()`." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "import json\n", 148 | "import sys\n", 149 | "\n", 150 | "from ibm_watson import NaturalLanguageUnderstandingV1\n", 151 | "from ibm_watson import VisualRecognitionV3\n", 152 | "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator\n", 153 | "from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, EmotionOptions, SentimentOptions\n", 154 | "\n", 155 | "import operator\n", 156 | "from functools import reduce\n", 157 | "from io import StringIO\n", 158 | "import numpy as np\n", 159 | "from bs4 import BeautifulSoup as bs\n", 160 | "from operator import itemgetter\n", 161 | "from os.path import join, dirname\n", 162 | "import pandas as pd\n", 163 | "import numpy as np\n", 164 | "import requests\n", 165 | "\n", 166 | "# Suppress some pandas warnings\n", 167 | "pd.options.mode.chained_assignment = None # default='warn'\n", 168 | "# Suppress SSL warnings\n", 169 | "requests.packages.urllib3.disable_warnings()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "\n", 177 | "### 1.5 Add Service Credentials From IBM Cloud for Watson Services\n", 178 | "Edit the following cell to provide your credentials for Watson and Natural Language Understanding and Visual Recognition.\n", 179 | "\n", 180 | "You must create a Watson Natural Language Understanding service and, optionally, a Watson Visual Recognition service on [IBM Cloud](https://cloud.ibm.com/).\n", 181 | "\n", 182 | "1. Create a service for [Natural Language Understanding (NLU)](https://cloud.ibm.com/catalog/services/natural-language-understanding). \n", 183 | "1. Create a service for [Visual Recognition](https://cloud.ibm.com/catalog/services/visual-recognition).\n", 184 | "1. Insert API keys and URLs in the following cell.\n", 185 | "1. Run the cell.\n", 186 | "\n", 187 | "### _User Input_ " 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# @hidden_cell\n", 197 | "\n", 198 | "# Watson Natural Language Understanding (NLU)\n", 199 | "NATURAL_LANGUAGE_UNDERSTANDING_API_KEY = ''\n", 200 | "NATURAL_LANGUAGE_UNDERSTANDING_URL = ''\n", 201 | "\n", 202 | "# Watson Visual Recognition (optional)\n", 203 | "VISUAL_RECOGNITION_API_KEY = ''\n", 204 | "VISUAL_RECOGNITION_URL = ''\n" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "# Create the Watson clients\n", 214 | "\n", 215 | "nlu_auth = IAMAuthenticator(NATURAL_LANGUAGE_UNDERSTANDING_API_KEY)\n", 216 | "nlu = NaturalLanguageUnderstandingV1(version='2020-03-09',\n", 217 | " authenticator=nlu_auth)\n", 218 | "nlu.set_service_url(NATURAL_LANGUAGE_UNDERSTANDING_URL)\n", 219 | "\n", 220 | "visual_recognition = False # Making visrec optional.\n", 221 | "if VISUAL_RECOGNITION_API_KEY and VISUAL_RECOGNITION_URL:\n", 222 | " vr_auth = IAMAuthenticator(VISUAL_RECOGNITION_API_KEY)\n", 223 | " visual_recognition = VisualRecognitionV3(version='2019-03-09',\n", 224 | " authenticator=vr_auth)\n", 225 | " visual_recognition.set_service_url(VISUAL_RECOGNITION_URL)\n", 226 | "else:\n", 227 | " print(\"Skipping Visual Recognition\")" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "\n", 235 | "## 2. Load Data\n", 236 | "The data you'll analyzing is a sample of a standard export of the Facebook Insights Post information from the IBM Watson Facebook page. Engagement metrics such as clicks, impressions, and so on, are altered and do not reflect actual post performance data. The data is on the Watson Studio community page.\n", 237 | "\n", 238 | "### 2.1 Load the data as a pandas DataFrame\n", 239 | "\n", 240 | "To get the data and load it into a pandas DataFrame:\n", 241 | "\n", 242 | "1. Load the file by clicking the **Find and Add Data** icon and then dragging and dropping the file onto the pane or browsing for the file. The data is stored in the object storage container that is associated with your project.\n", 243 | "1. Click in the next cell and then choose **Insert to code > pandas DataFrame** from below the file name and then run the cell. Change the inserted variable name to `df_data_1`\n", 244 | "\n", 245 | "### _User Input_ " 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "# **Insert to code > pandas DataFrame**\n" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "### 2.2 Set variables\n", 262 | "You need to set these variables:\n", 263 | " - The name of the DataFrame\n", 264 | " - Your credentials for the source file\n", 265 | " - A file name for the enriched DataFrame\n", 266 | " \n", 267 | "Define a variable, `df`, for the DataFrame that you just created. If necessary, change the original DataFrame name to match the one you created." 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "# Make sure this uses the variable above. The number will vary in the inserted code.\n", 277 | "try:\n", 278 | " df = df_data_1\n", 279 | "except NameError as e:\n", 280 | " print('Error: Setup is incorrect or incomplete.\\n')\n", 281 | " print('Follow the instructions to insert the pandas DataFrame above, and edit to')\n", 282 | " print('make the generated df_data_# variable match the variable used here.')\n", 283 | " raise" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "**Select the cell below and place your cursor on an empty line below the comment.** \n", 291 | "Put in the credentials for the file you want to enrich by clicking on the 10/01 (upper right), then click `Insert to code` under the file you want to enrich, and choose `Insert Credentials`.\n", 292 | "\n", 293 | "**Change the inserted variable name to `credentials_1`**\n", 294 | "\n", 295 | "### _User Input_ " 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "# insert credentials for file - Change to credentials_1\n" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "# Make sure this uses the variable above. The number will vary in the inserted code.\n", 314 | "try:\n", 315 | " credentials = credentials_1\n", 316 | "except NameError as e:\n", 317 | " print('Error: Setup is incorrect or incomplete.\\n')\n", 318 | " print('Follow the instructions to insert the file credentials above, and edit to')\n", 319 | " print('make the generated credentials_# variable match the variable used here.')\n", 320 | " raise" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "\n", 328 | "## 3. Prepare Data\n", 329 | "You'll prepare the data by cleansing it and extracting the URLs. Many of the posts contain both text and a URL. The first task is to separate URLs from the text so that they can be analyzed separately. Then you need to get thumbnails for the photos and links, and convert any shortened URLs to full URLs." 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "\n", 337 | "### 3.1 Data Cleansing with Python\n", 338 | "Renaming columns, removing noticeable noise in the data, pulling out URLs and appending to a new column to run through NLU.\n", 339 | "\n", 340 | "To cleanse the data, you'll rename a column and add a column with the URLs that were embedded in the post. \n", 341 | "\n", 342 | "Change the name of the `Post Message` column to `Text`:" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "df.rename(columns={'Post Message': 'Text'}, inplace=True)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "# Drop the rows that have no value for the text.\n", 361 | "df.dropna(subset=['Text'], inplace=True)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "Use the `str.partition` function to remove strings that contain \"http\" and \"www\" from the `Text` column and save them in new DataFrames, then add all web addresses to a new `Link` column in the original DataFrame. This process captures all web addresses: https, http, and www." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [ 377 | "df_http = df[\"Text\"].str.partition(\"http\")\n", 378 | "df_www = df[\"Text\"].str.partition(\"www\")\n", 379 | "\n", 380 | "# Combine delimiters with actual links\n", 381 | "df_http[\"Link\"] = df_http[1].map(str) + df_http[2]\n", 382 | "df_www[\"Link1\"] = df_www[1].map(str) + df_www[2]\n", 383 | "\n", 384 | "# Include only Link columns\n", 385 | "df_http.drop(df_http.columns[0:3], axis=1, inplace = True)\n", 386 | "df_www.drop(df_www.columns[0:3], axis=1, inplace = True)\n", 387 | "\n", 388 | "# Merge http and www DataFrames\n", 389 | "dfmerge = pd.concat([df_http, df_www], axis=1)\n", 390 | "\n", 391 | "# The following steps will allow you to merge data columns from the left to the right\n", 392 | "dfmerge = dfmerge.apply(lambda x: x.str.strip()).replace('', np.nan)\n", 393 | "\n", 394 | "# Use fillna to fill any blanks with the Link1 column\n", 395 | "dfmerge[\"Link\"].fillna(dfmerge[\"Link1\"], inplace = True)\n", 396 | "\n", 397 | "# Delete Link1 (www column)\n", 398 | "dfmerge.drop(\"Link1\", axis=1, inplace = True)\n", 399 | "\n", 400 | "# Combine Link data frame\n", 401 | "df = pd.concat([dfmerge,df], axis = 1)\n", 402 | "\n", 403 | "# Make sure text column is a string\n", 404 | "df[\"Text\"] = df[\"Text\"].astype(\"str\")\n", 405 | "\n", 406 | "# Strip links from Text column\n", 407 | "df['Text'] = df['Text'].apply(lambda x: x.split('http')[0])\n", 408 | "df['Text'] = df['Text'].apply(lambda x: x.split('www')[0])" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "### 3.2 Extract thumbnails and extended links\n", 416 | "\n", 417 | "A standard Facebook export does not provide the thumbnail that usually summarizes the link or photo associated with each post. Use the Beautiful Soup library to go into the HTML of the post and extract the thumbnail text:" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "# Change links from objects to strings\n", 427 | "for link in df.Link:\n", 428 | " df.Link.to_string()\n", 429 | "\n", 430 | "piclinks = []\n", 431 | "description = []\n", 432 | "for url in df[\"Link\"]:\n", 433 | " if pd.isnull(url):\n", 434 | " piclinks.append(\"\")\n", 435 | " description.append(\"\")\n", 436 | " continue\n", 437 | " \n", 438 | " try:\n", 439 | " # Skip certificate check with verify=False.\n", 440 | " # Don't do this if your urls are not secure.\n", 441 | " page3 = requests.get(url, verify=False)\n", 442 | " if page3.status_code != requests.codes.ok:\n", 443 | " piclinks.append(\"\")\n", 444 | " description.append(\"\")\n", 445 | " continue\n", 446 | " except Exception as e:\n", 447 | " print(\"Skipping url %s: %s\" % (url, e))\n", 448 | " piclinks.append(\"\")\n", 449 | " description.append(\"\")\n", 450 | " continue\n", 451 | " \n", 452 | " soup3 = bs(page3.text,\"lxml\")\n", 453 | " \n", 454 | " pic = soup3.find('meta', property =\"og:image\")\n", 455 | " if pic:\n", 456 | " piclinks.append(pic[\"content\"])\n", 457 | " else: \n", 458 | " piclinks.append(\"\")\n", 459 | " \n", 460 | " content = None\n", 461 | " desc = soup3.find(attrs={'name':'Description'})\n", 462 | " if desc:\n", 463 | " content = desc['content']\n", 464 | " if not content or content == 'null':\n", 465 | " # Try again with lowercase description\n", 466 | " desc = soup3.find(attrs={'name':'description'})\n", 467 | " if desc:\n", 468 | " content = desc['content']\n", 469 | " if not content or content == 'null':\n", 470 | " description.append(\"\")\n", 471 | " else:\n", 472 | " description.append(content)\n", 473 | " \n", 474 | "# Save thumbnail descriptions to df in a column titled 'Thumbnails'\n", 475 | "df[\"Thumbnails\"] = description\n", 476 | "# Save image links to df in a column titled 'Image'\n", 477 | "df[\"Image\"] = piclinks" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": {}, 483 | "source": [ 484 | "Convert shortened links to full links.\n", 485 | "Use requests module to pull extended links. This is only necessary if the Facebook page uses different links than the articles themselves. For this example we are using IBM Watson's Facebook export which uses an IBM link. \n" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": {}, 492 | "outputs": [], 493 | "source": [ 494 | "shortlink = df[\"Link\"]\n", 495 | "extendedlink = []\n", 496 | "\n", 497 | "for link in shortlink:\n", 498 | " if isinstance(link, float): # Float is not a URL, probably NaN.\n", 499 | " extendedlink.append('')\n", 500 | " else:\n", 501 | " try:\n", 502 | " extended_link = requests.Session().head(link, allow_redirects=True).url\n", 503 | " extendedlink.append(extended_link)\n", 504 | " except Exception as e:\n", 505 | " print(\"Skipping link %s: %s\" % (link, e))\n", 506 | " extendedlink.append('')\n", 507 | "\n", 508 | "df[\"Extended Links\"] = extendedlink" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | " \n", 516 | "## 4. Enrichment Time!\n", 517 | "\n", 518 | "### 4.1 NLU for the Post Text\n", 519 | "The following script is an example of how to use Natural Language Understanding to iterate through each post and extract enrichment features for future analysis.\n", 520 | "\n", 521 | "For this example, we are looking at the `Text` column in our DataFrame, which contains the text of each post. NLU can also iterate through a column of URLs, or other freeform text. There's a list within a list for the Keywords and Entities features to allow gathering multiple entities and keywords from each piece of text.\n", 522 | "\n", 523 | "Each extracted feature is appended to the DataFrame in a new column that's defined at the end of the script. If you want to run this same script for the other columns, set the loop iterable to the column name, if you are using URLs, change the `text=response` parameter to `url=response`, and update the new column names as necessary. " 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "# Define the list of features to get enrichment values for entities, keywords, emotion and sentiment\n", 533 | "features = Features(entities=EntitiesOptions(), keywords=KeywordsOptions(), emotion=EmotionOptions(), sentiment=SentimentOptions())\n", 534 | "\n", 535 | "overallSentimentScore = []\n", 536 | "overallSentimentType = []\n", 537 | "highestEmotion = []\n", 538 | "highestEmotionScore = []\n", 539 | "kywords = []\n", 540 | "entities = []\n", 541 | "\n", 542 | "# Go through every response and enrich the text using NLU.\n", 543 | "for text in df['Text']:\n", 544 | " if not text:\n", 545 | " # print(\"Text is empty\")\n", 546 | " overallSentimentScore.append('0')\n", 547 | " overallSentimentType.append('0')\n", 548 | " highestEmotion.append(\"\")\n", 549 | " highestEmotionScore.append(\"\")\n", 550 | " kywords.append(\"\")\n", 551 | " entities.append(\"\")\n", 552 | " continue\n", 553 | " else:\n", 554 | " # We are assuming English to avoid errors when the language cannot be detected.\n", 555 | " enriched_json = nlu.analyze(text=text, features=features, language='en').get_result()\n", 556 | "\n", 557 | " # Get the SENTIMENT score and type\n", 558 | " if 'sentiment' in enriched_json:\n", 559 | " if('score' in enriched_json['sentiment'][\"document\"]):\n", 560 | " overallSentimentScore.append(enriched_json[\"sentiment\"][\"document\"][\"score\"])\n", 561 | " else:\n", 562 | " overallSentimentScore.append('0')\n", 563 | "\n", 564 | " if('label' in enriched_json['sentiment'][\"document\"]):\n", 565 | " overallSentimentType.append(enriched_json[\"sentiment\"][\"document\"][\"label\"])\n", 566 | " else:\n", 567 | " overallSentimentType.append('0')\n", 568 | " else:\n", 569 | " overallSentimentScore.append('0')\n", 570 | " overallSentimentType.append('0')\n", 571 | "\n", 572 | " # Read the EMOTIONS into a dict and get the key (emotion) with maximum value\n", 573 | " if 'emotion' in enriched_json:\n", 574 | " me = max(enriched_json[\"emotion\"][\"document\"][\"emotion\"].items(), key=operator.itemgetter(1))[0]\n", 575 | " highestEmotion.append(me)\n", 576 | " highestEmotionScore.append(enriched_json[\"emotion\"][\"document\"][\"emotion\"][me])\n", 577 | " else:\n", 578 | " highestEmotion.append(\"\")\n", 579 | " highestEmotionScore.append(\"\")\n", 580 | "\n", 581 | " # Iterate and get KEYWORDS with a confidence of over 70%\n", 582 | " if 'keywords' in enriched_json:\n", 583 | " tmpkw = []\n", 584 | " for kw in enriched_json['keywords']:\n", 585 | " if(float(kw[\"relevance\"]) >= 0.7):\n", 586 | " tmpkw.append(kw[\"text\"])\n", 587 | " # Convert multiple keywords in a list to a string and append the string\n", 588 | " kywords.append(', '.join(tmpkw))\n", 589 | " else:\n", 590 | " kywords.append(\"\")\n", 591 | " \n", 592 | " # Iterate and get Entities with a confidence of over 30%\n", 593 | " if 'entities' in enriched_json:\n", 594 | " tmpent = []\n", 595 | " for ent in enriched_json['entities']: \n", 596 | " if(float(ent[\"relevance\"]) >= 0.3):\n", 597 | " tmpent.append(ent[\"type\"])\n", 598 | " \n", 599 | " # Convert multiple entities in a list to a string and append the string\n", 600 | " entities.append(', '.join(tmpent))\n", 601 | " else:\n", 602 | " entities.append(\"\")\n", 603 | " \n", 604 | "# Create columns from the list and append to the DataFrame\n", 605 | "if highestEmotion:\n", 606 | " df['TextHighestEmotion'] = highestEmotion\n", 607 | "if highestEmotionScore:\n", 608 | " df['TextHighestEmotionScore'] = highestEmotionScore\n", 609 | "\n", 610 | "if overallSentimentType:\n", 611 | " df['TextOverallSentimentType'] = overallSentimentType\n", 612 | "if overallSentimentScore:\n", 613 | " df['TextOverallSentimentScore'] = overallSentimentScore\n", 614 | "\n", 615 | "df['TextKeywords'] = kywords\n", 616 | "df['TextEntities'] = entities" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": {}, 622 | "source": [ 623 | "After we extract all of the Keywords and Entities from each Post, we have columns with multiple Keywords and Entities separated by commas. For our Analysis in Part II, we also wanted the top Keyword and Entity for each Post. Because of this, we added two new columns to capture the `MaxTextKeyword` and `MaxTextEntity`." 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": null, 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "# Choose first of Keywords and Entities\n", 633 | "df[\"MaxTextKeywords\"] = df[\"TextKeywords\"].apply(lambda x: x.split(',')[0])\n", 634 | "df[\"MaxTextEntity\"] = df[\"TextEntities\"].apply(lambda x: x.split(',')[0])" 635 | ] 636 | }, 637 | { 638 | "cell_type": "markdown", 639 | "metadata": {}, 640 | "source": [ 641 | "\n", 642 | "### 4.2 NLU for Thumbnail Text\n", 643 | "\n", 644 | "We will repeat the same process for Thumbnails and Article Text." 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": null, 650 | "metadata": {}, 651 | "outputs": [], 652 | "source": [ 653 | "# Define the list of features to get enrichment values for entities, keywords, emotion and sentiment\n", 654 | "features = Features(entities=EntitiesOptions(), keywords=KeywordsOptions(), emotion=EmotionOptions(), sentiment=SentimentOptions())\n", 655 | "\n", 656 | "overallSentimentScore = []\n", 657 | "overallSentimentType = []\n", 658 | "highestEmotion = []\n", 659 | "highestEmotionScore = []\n", 660 | "kywords = []\n", 661 | "entities = []\n", 662 | "\n", 663 | "# Go through every response and enrich the text using NLU.\n", 664 | "for text in df['Thumbnails']:\n", 665 | " if not text:\n", 666 | " overallSentimentScore.append(' ')\n", 667 | " overallSentimentType.append(' ')\n", 668 | " highestEmotion.append(' ')\n", 669 | " highestEmotionScore.append(' ')\n", 670 | " kywords.append(' ')\n", 671 | " entities.append(' ')\n", 672 | " continue\n", 673 | "\n", 674 | " enriched_json = nlu.analyze(text=text, features=features, language='en').get_result()\n", 675 | "\n", 676 | " # Get the SENTIMENT score and type\n", 677 | " if 'sentiment' in enriched_json:\n", 678 | " if('score' in enriched_json['sentiment'][\"document\"]):\n", 679 | " overallSentimentScore.append(enriched_json[\"sentiment\"][\"document\"][\"score\"])\n", 680 | " else:\n", 681 | " overallSentimentScore.append(\"\")\n", 682 | "\n", 683 | " if('label' in enriched_json['sentiment'][\"document\"]):\n", 684 | " overallSentimentType.append(enriched_json[\"sentiment\"][\"document\"][\"label\"])\n", 685 | " else:\n", 686 | " overallSentimentType.append(\"\")\n", 687 | "\n", 688 | " # Read the EMOTIONS into a dict and get the key (emotion) with maximum value\n", 689 | " if 'emotion' in enriched_json:\n", 690 | " me = max(enriched_json[\"emotion\"][\"document\"][\"emotion\"].items(), key=operator.itemgetter(1))[0]\n", 691 | " highestEmotion.append(me)\n", 692 | " highestEmotionScore.append(enriched_json[\"emotion\"][\"document\"][\"emotion\"][me])\n", 693 | "\n", 694 | " else:\n", 695 | " highestEmotion.append(\"\")\n", 696 | " highestEmotionScore.append(\"\")\n", 697 | "\n", 698 | " # Iterate and get KEYWORDS with a confidence of over 70%\n", 699 | " if 'keywords' in enriched_json:\n", 700 | " tmpkw = []\n", 701 | " for kw in enriched_json['keywords']:\n", 702 | " if(float(kw[\"relevance\"]) >= 0.7):\n", 703 | " tmpkw.append(kw[\"text\"])\n", 704 | " # Convert multiple keywords in a list to a string and append the string\n", 705 | " kywords.append(', '.join(tmpkw))\n", 706 | " \n", 707 | " # Iterate and get Entities with a confidence of over 30%\n", 708 | " if 'entities' in enriched_json:\n", 709 | " tmpent = []\n", 710 | " for ent in enriched_json['entities']: \n", 711 | " if(float(ent[\"relevance\"]) >= 0.3):\n", 712 | " tmpent.append(ent[\"type\"])\n", 713 | " # Convert multiple entities in a list to a string and append the string\n", 714 | " entities.append(', '.join(tmpent))\n", 715 | " else:\n", 716 | " entities.append(\"\") \n", 717 | " \n", 718 | "# Create columns from the list and append to the DataFrame\n", 719 | "if highestEmotion:\n", 720 | " df['ThumbnailHighestEmotion'] = highestEmotion\n", 721 | "if highestEmotionScore:\n", 722 | " df['ThumbnailHighestEmotionScore'] = highestEmotionScore\n", 723 | "\n", 724 | "if overallSentimentType:\n", 725 | " df['ThumbnailOverallSentimentType'] = overallSentimentType\n", 726 | "if overallSentimentScore:\n", 727 | " df['ThumbnailOverallSentimentScore'] = overallSentimentScore\n", 728 | "\n", 729 | "df['ThumbnailKeywords'] = kywords\n", 730 | "df['ThumbnailEntities'] = entities" 731 | ] 732 | }, 733 | { 734 | "cell_type": "markdown", 735 | "metadata": {}, 736 | "source": [ 737 | " Add two new columns to capture the `MaxThumbnailKeyword` and `MaxThumbnailEntity`:" 738 | ] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": null, 743 | "metadata": {}, 744 | "outputs": [], 745 | "source": [ 746 | "# Set 'Max' to first one from keywords and entities lists\n", 747 | "df[\"MaxThumbnailKeywords\"] = df[\"ThumbnailKeywords\"].apply(lambda x: x.split(',')[0])\n", 748 | "df[\"MaxThumbnailEntity\"] = df[\"ThumbnailEntities\"].apply(lambda x: x.split(',')[0])" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "metadata": {}, 754 | "source": [ 755 | " \n", 756 | "### 4.3 NLU for Article Text" 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": null, 762 | "metadata": {}, 763 | "outputs": [], 764 | "source": [ 765 | "# Define the list of features to get enrichment values for entities, keywords, emotion and sentiment\n", 766 | "features = Features(entities=EntitiesOptions(), keywords=KeywordsOptions(), emotion=EmotionOptions(), sentiment=SentimentOptions())\n", 767 | "\n", 768 | "overallSentimentScore = []\n", 769 | "overallSentimentType = []\n", 770 | "highestEmotion = []\n", 771 | "highestEmotionScore = []\n", 772 | "kywords = []\n", 773 | "entities = []\n", 774 | "article_text = []\n", 775 | " \n", 776 | "# Go through every response and enrich the article using NLU\n", 777 | "for url in df['Extended Links']:\n", 778 | " if not url:\n", 779 | " overallSentimentScore.append(' ')\n", 780 | " overallSentimentType.append(' ')\n", 781 | " highestEmotion.append(' ')\n", 782 | " highestEmotionScore.append(' ')\n", 783 | " kywords.append(' ')\n", 784 | " entities.append(' ')\n", 785 | " article_text.append(' ')\n", 786 | " continue\n", 787 | "\n", 788 | " # Run links through NLU to get entities, keywords, emotion and sentiment.\n", 789 | " # Use return_analyzed_text to extract text for Tone Analyzer to use.\n", 790 | " \n", 791 | " try:\n", 792 | " enriched_json = nlu.analyze(url=url,\n", 793 | " features=features,\n", 794 | " language='en',\n", 795 | " return_analyzed_text=True).get_result()\n", 796 | " article_text.append(enriched_json[\"analyzed_text\"])\n", 797 | " except Exception as e:\n", 798 | " print(\"Skipping url %s: %s\" % (url, e))\n", 799 | " overallSentimentScore.append(' ')\n", 800 | " overallSentimentType.append(' ')\n", 801 | " highestEmotion.append(' ')\n", 802 | " highestEmotionScore.append(' ')\n", 803 | " kywords.append(' ')\n", 804 | " entities.append(' ')\n", 805 | " article_text.append(' ')\n", 806 | " continue\n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | "\n", 811 | " # Get the SENTIMENT score and type\n", 812 | " if 'sentiment' in enriched_json:\n", 813 | " if('score' in enriched_json['sentiment'][\"document\"]):\n", 814 | " overallSentimentScore.append(enriched_json[\"sentiment\"][\"document\"][\"score\"])\n", 815 | " else:\n", 816 | " overallSentimentScore.append('None')\n", 817 | "\n", 818 | " if('label' in enriched_json['sentiment'][\"document\"]):\n", 819 | " overallSentimentType.append(enriched_json[\"sentiment\"][\"document\"][\"label\"])\n", 820 | " else:\n", 821 | " overallSentimentType.append('')\n", 822 | "\n", 823 | " # Read the EMOTIONS into a dict and get the key (emotion) with maximum value\n", 824 | " if 'emotion' in enriched_json:\n", 825 | " me = max(enriched_json[\"emotion\"][\"document\"][\"emotion\"].items(), key=operator.itemgetter(1))[0]\n", 826 | " highestEmotion.append(me)\n", 827 | " highestEmotionScore.append(enriched_json[\"emotion\"][\"document\"][\"emotion\"][me])\n", 828 | "\n", 829 | " else:\n", 830 | " highestEmotion.append('')\n", 831 | " highestEmotionScore.append('')\n", 832 | "\n", 833 | " # Iterate and get KEYWORDS with a confidence of over 70%\n", 834 | " if 'keywords' in enriched_json:\n", 835 | " tmpkw = []\n", 836 | " for kw in enriched_json['keywords']:\n", 837 | " if(float(kw[\"relevance\"]) >= 0.7):\n", 838 | " tmpkw.append(kw[\"text\"])\n", 839 | " # Convert multiple keywords in a list to a string and append the string\n", 840 | " kywords.append(', '.join(tmpkw))\n", 841 | " else: \n", 842 | " kywords.append(\"\")\n", 843 | " \n", 844 | " # Iterate and get Entities with a confidence of over 30%\n", 845 | " if 'entities' in enriched_json:\n", 846 | " tmpent = []\n", 847 | " for ent in enriched_json['entities']: \n", 848 | " if(float(ent[\"relevance\"]) >= 0.3):\n", 849 | " tmpent.append(ent[\"type\"])\n", 850 | " # Convert multiple entities in a list to a string and append the string\n", 851 | " entities.append(', '.join(tmpent))\n", 852 | " else:\n", 853 | " entities.append(\"\")\n", 854 | " \n", 855 | "# Create columns from the list and append to the DataFrame\n", 856 | "if highestEmotion:\n", 857 | " df['LinkHighestEmotion'] = highestEmotion\n", 858 | "if highestEmotionScore:\n", 859 | " df['LinkHighestEmotionScore'] = highestEmotionScore\n", 860 | "\n", 861 | "if overallSentimentType:\n", 862 | " df['LinkOverallSentimentType'] = overallSentimentType\n", 863 | "if overallSentimentScore:\n", 864 | " df['LinkOverallSentimentScore'] = overallSentimentScore\n", 865 | "\n", 866 | "df['LinkKeywords'] = kywords\n", 867 | "df['LinkEntities'] = entities\n", 868 | "df['Article Text'] = article_text" 869 | ] 870 | }, 871 | { 872 | "cell_type": "markdown", 873 | "metadata": {}, 874 | "source": [ 875 | "Add two new columns to capture the `MaxLinkKeyword` and `MaxLinkEntity`:" 876 | ] 877 | }, 878 | { 879 | "cell_type": "code", 880 | "execution_count": null, 881 | "metadata": {}, 882 | "outputs": [], 883 | "source": [ 884 | "# Set 'Max' to first one from keywords and entities lists\n", 885 | "df[\"MaxLinkKeywords\"] = df[\"LinkKeywords\"].apply(lambda x: x.split(',')[0])\n", 886 | "df[\"MaxLinkEntity\"] = df[\"LinkEntities\"].apply(lambda x: x.split(',')[0])" 887 | ] 888 | }, 889 | { 890 | "cell_type": "markdown", 891 | "metadata": {}, 892 | "source": [ 893 | " \n", 894 | "### 4.4 Visual Recognition\n", 895 | "Below uses Visual Recognition to classify the thumbnail images.\n", 896 | "\n", 897 | "> NOTE: When using the **free tier** of Visual Recognition, _classify_ has a limit of 250 images per day." 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": null, 903 | "metadata": {}, 904 | "outputs": [], 905 | "source": [ 906 | "if visual_recognition:\n", 907 | " piclinks = df[\"Image\"]\n", 908 | "\n", 909 | " picclass = []\n", 910 | " piccolor = []\n", 911 | " pictype1 = []\n", 912 | " pictype2 = []\n", 913 | " pictype3 = []\n", 914 | "\n", 915 | " for pic in piclinks:\n", 916 | " if not pic or pic == 'default-img':\n", 917 | " picclass.append(' ')\n", 918 | " piccolor.append(' ')\n", 919 | " pictype1.append(' ')\n", 920 | " pictype2.append(' ')\n", 921 | " pictype3.append(' ')\n", 922 | " continue\n", 923 | "\n", 924 | " classes = []\n", 925 | " enriched_json = {}\n", 926 | " try:\n", 927 | " enriched_json = visual_recognition.classify(url=pic).get_result()\n", 928 | " except Exception as e:\n", 929 | " print(\"Skipping url %s: %s\" % (pic, e))\n", 930 | "\n", 931 | " if 'error' in enriched_json:\n", 932 | " print(enriched_json['error'])\n", 933 | " if 'images' in enriched_json and 'classifiers' in enriched_json['images'][0]:\n", 934 | " classes = enriched_json['images'][0][\"classifiers\"][0][\"classes\"]\n", 935 | "\n", 936 | " color1 = None\n", 937 | " class1 = None\n", 938 | " type_hierarchy1 = None\n", 939 | "\n", 940 | " for iclass in classes:\n", 941 | " # Grab the first color, first class, and first type hierarchy.\n", 942 | " # Note: Usually you'd filter by 'score' too.\n", 943 | " if not type_hierarchy1 and 'type_hierarchy' in iclass:\n", 944 | " type_hierarchy1 = iclass['type_hierarchy']\n", 945 | " if not class1:\n", 946 | " class1 = iclass['class']\n", 947 | " if not color1 and iclass['class'].endswith(' color'):\n", 948 | " color1 = iclass['class'][:-len(' color')]\n", 949 | " if type_hierarchy1 and class1 and color1:\n", 950 | " # We are only using 1 of each per image. When we have all 3, break.\n", 951 | " break\n", 952 | "\n", 953 | " picclass.append(class1 or ' ')\n", 954 | " piccolor.append(color1 or ' ')\n", 955 | " type_split = (type_hierarchy1 or '/ / / ').split('/')\n", 956 | " pictype1.append(type_split[1] if len(type_split) > 1 else '-')\n", 957 | " pictype2.append(type_split[2] if len(type_split) > 2 else '- ')\n", 958 | " pictype3.append(type_split[3] if len(type_split) > 3 else '-')\n", 959 | "\n", 960 | " df[\"Image Color\"] = piccolor\n", 961 | " df[\"Image Class\"] = picclass\n", 962 | " df[\"Image Type\"] = pictype1\n", 963 | " df[\"Image Subtype\"] = pictype2\n", 964 | " df[\"Image Subtype2\"] = pictype3" 965 | ] 966 | }, 967 | { 968 | "cell_type": "markdown", 969 | "metadata": {}, 970 | "source": [ 971 | " \n", 972 | "## Enrichment is now COMPLETE!\n", 973 | " \n", 974 | "Save a copy of the enriched DataFrame as a file in Cloud Object Storage. To run the upload_file function we first need to create a variable that contains our credentials we created in section 2.2. No user input is required as we already have all of the information we need. To upload the file to COS simply run the next two cells." 975 | ] 976 | }, 977 | { 978 | "cell_type": "code", 979 | "execution_count": null, 980 | "metadata": {}, 981 | "outputs": [], 982 | "source": [ 983 | "cos = ibm_boto3.client(service_name='s3',\n", 984 | " ibm_api_key_id=credentials['IBM_API_KEY_ID'],\n", 985 | " ibm_service_instance_id=credentials['IAM_SERVICE_ID'],\n", 986 | " ibm_auth_endpoint=credentials['IBM_AUTH_ENDPOINT'],\n", 987 | " config=Config(signature_version='oauth'),\n", 988 | " endpoint_url=credentials['ENDPOINT'])" 989 | ] 990 | }, 991 | { 992 | "cell_type": "code", 993 | "execution_count": null, 994 | "metadata": {}, 995 | "outputs": [], 996 | "source": [ 997 | "# Build the enriched file name from the original filename.\n", 998 | "localfilename = 'enriched_' + credentials['FILE']\n", 999 | "\n", 1000 | "# Write a CSV file from the enriched pandas DataFrame.\n", 1001 | "df.to_csv(localfilename, index=False)\n", 1002 | "\n", 1003 | "# Use the above put_file method with credentials to put the file in Object Storage.\n", 1004 | "cos.upload_file(localfilename, Bucket=credentials['BUCKET'],Key=localfilename)" 1005 | ] 1006 | }, 1007 | { 1008 | "cell_type": "code", 1009 | "execution_count": null, 1010 | "metadata": {}, 1011 | "outputs": [], 1012 | "source": [ 1013 | "# If you want to use the enriched local file, you can read it back in.\n", 1014 | "# This might be handy if you already enriched and just want to re-run\n", 1015 | "# from this cell and below. Uncomment the following line.\n", 1016 | "\n", 1017 | "# df = pd.read_csv(localfilename)" 1018 | ] 1019 | }, 1020 | { 1021 | "cell_type": "markdown", 1022 | "metadata": {}, 1023 | "source": [ 1024 | " \n", 1025 | "# Part II - Data Preparation\n", 1026 | "\n", 1027 | "## 1. Prepare Data\n", 1028 | " \n", 1029 | "### 1.1 Prepare Multiple DataFrames for Visualizations\n", 1030 | "Before we can create the separate tables for each Watson feature we need to organize and reformat the data. First, we need to determine which data points are tied to metrics. Second, we need to make sure make sure each metric is numeric. _(This is necessary for PixieDust in Part III)_" 1031 | ] 1032 | }, 1033 | { 1034 | "cell_type": "code", 1035 | "execution_count": null, 1036 | "metadata": {}, 1037 | "outputs": [], 1038 | "source": [ 1039 | "# Put the lifetime metrics in a list\n", 1040 | "metrics = [metric for metric in df.columns.values.tolist() if 'Lifetime' in metric]" 1041 | ] 1042 | }, 1043 | { 1044 | "cell_type": "markdown", 1045 | "metadata": {}, 1046 | "source": [ 1047 | " \n", 1048 | "### 1.2 Create a Consolidated Sentiment and Emotion DataFrame\n", 1049 | "You'll create a DataFrame for the sentiment and emotion of the post text and a DataFrame for the sentiment and emotion of the article text. Then you'll combine them into one DataFrame.\n" 1050 | ] 1051 | }, 1052 | { 1053 | "cell_type": "markdown", 1054 | "metadata": {}, 1055 | "source": [ 1056 | "#### Post Sentiment and Emotion DataFrame" 1057 | ] 1058 | }, 1059 | { 1060 | "cell_type": "code", 1061 | "execution_count": null, 1062 | "metadata": {}, 1063 | "outputs": [], 1064 | "source": [ 1065 | "# Create a list with only Post sentiment and emotion values\n", 1066 | "post_tones = [\"Text\",\"TextHighestEmotion\", \"TextHighestEmotionScore\", \"TextOverallSentimentType\", \"TextOverallSentimentScore\"]\n", 1067 | "\n", 1068 | "# Append DataFrame with these metrics\n", 1069 | "post_tones.extend(metrics)\n", 1070 | "\n", 1071 | "# Create a new DataFrame with metrics and sentiment and emotion\n", 1072 | "df_post_tones = df[post_tones]\n", 1073 | "\n", 1074 | "# Determine which tone values are suppose to be numeric and ensure they are numeric. \n", 1075 | "post_numeric_values = [\"TextHighestEmotionScore\", \"TextOverallSentimentScore\"]\n", 1076 | "for i in post_numeric_values:\n", 1077 | " df_post_tones[i] = pd.to_numeric(df_post_tones[i], errors='coerce')\n", 1078 | "\n", 1079 | "# Make all metrics numeric\n", 1080 | "for i in metrics:\n", 1081 | " df_post_tones[i] = pd.to_numeric(df_post_tones[i], errors='coerce')\n", 1082 | "\n", 1083 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1084 | "df_post_tones[\"Type\"] = \"Post\"" 1085 | ] 1086 | }, 1087 | { 1088 | "cell_type": "markdown", 1089 | "metadata": {}, 1090 | "source": [ 1091 | "#### Article Sentiment and Emotion DataFrame" 1092 | ] 1093 | }, 1094 | { 1095 | "cell_type": "code", 1096 | "execution_count": null, 1097 | "metadata": {}, 1098 | "outputs": [], 1099 | "source": [ 1100 | "# Create a list with only Article sentiment and emotion values\n", 1101 | "article_tones = [\"Text\", \"LinkHighestEmotion\", \"LinkHighestEmotionScore\", \"LinkOverallSentimentType\", \"LinkOverallSentimentScore\"]\n", 1102 | "\n", 1103 | "# Append DataFrame with these metrics\n", 1104 | "article_tones.extend(metrics)\n", 1105 | "\n", 1106 | "# Create a new DataFrame with metrics and sentiment and emotion\n", 1107 | "df_article_tones = df[article_tones]\n", 1108 | "\n", 1109 | "# Determine which values are suppose to be numeric and ensure they are numeric. \n", 1110 | "art_numeric_values = [\"LinkHighestEmotionScore\", \"LinkOverallSentimentScore\"]\n", 1111 | "for i in art_numeric_values:\n", 1112 | " df_article_tones[i] = pd.to_numeric(df_article_tones[i], errors='coerce')\n", 1113 | " \n", 1114 | "# Make all metrics numeric\n", 1115 | "for i in metrics:\n", 1116 | " df_article_tones[i] = pd.to_numeric(df_article_tones[i], errors='coerce')\n", 1117 | "\n", 1118 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1119 | "df_article_tones[\"Type\"] = \"Article\"" 1120 | ] 1121 | }, 1122 | { 1123 | "cell_type": "markdown", 1124 | "metadata": {}, 1125 | "source": [ 1126 | "#### Combine Post and Article DataFrames to Make DataFrame with Sentiment and Emotion" 1127 | ] 1128 | }, 1129 | { 1130 | "cell_type": "code", 1131 | "execution_count": null, 1132 | "metadata": {}, 1133 | "outputs": [], 1134 | "source": [ 1135 | "# First make the Column Headers the same\n", 1136 | "df_post_tones.rename(columns={\"TextHighestEmotion\":\"Emotion\",\n", 1137 | " \"TextHighestEmotionScore\":\"Emotion Score\",\n", 1138 | " \"TextOverallSentimentType\": \"Sentiment\",\n", 1139 | " \"TextOverallSentimentScore\": \"Sentiment Score\"\n", 1140 | " },\n", 1141 | " inplace=True)\n", 1142 | "\n", 1143 | "df_article_tones.rename(columns={\"LinkHighestEmotion\":\"Emotion\",\n", 1144 | " \"LinkHighestEmotionScore\":\"Emotion Score\",\n", 1145 | " \"LinkOverallSentimentType\": \"Sentiment\",\n", 1146 | " \"LinkOverallSentimentScore\": \"Sentiment Score\"\n", 1147 | " },\n", 1148 | " inplace=True)\n", 1149 | "\n", 1150 | "# Combine into one data frame\n", 1151 | "df_tones = pd.concat([df_post_tones, df_article_tones])" 1152 | ] 1153 | }, 1154 | { 1155 | "cell_type": "code", 1156 | "execution_count": null, 1157 | "metadata": {}, 1158 | "outputs": [], 1159 | "source": [ 1160 | "# Only keep the positive, neutral, and negative sentiments. The others are empty or unusable.\n", 1161 | "df_tones = df_tones[df_tones.Sentiment.isin(['positive', 'neutral', 'negative'])]" 1162 | ] 1163 | }, 1164 | { 1165 | "cell_type": "markdown", 1166 | "metadata": {}, 1167 | "source": [ 1168 | " \n", 1169 | "### 1.3 Create a Consolidated Keyword DataFrame\n", 1170 | "You'll create DataFrames for the keywords of the article text, the thumbnail text, and the post text. Then you'll combine them into one DataFrame." 1171 | ] 1172 | }, 1173 | { 1174 | "cell_type": "markdown", 1175 | "metadata": {}, 1176 | "source": [ 1177 | " #### Article Keyword DataFrame " 1178 | ] 1179 | }, 1180 | { 1181 | "cell_type": "code", 1182 | "execution_count": null, 1183 | "metadata": {}, 1184 | "outputs": [], 1185 | "source": [ 1186 | "# Create a list with only Article Keywords\n", 1187 | "article_keywords = [\"Text\", \"MaxLinkKeywords\"]\n", 1188 | "\n", 1189 | "# Append DataFrame with these metrics\n", 1190 | "article_keywords.extend(metrics)\n", 1191 | "\n", 1192 | "# Create a new DataFrame with keywords and metrics\n", 1193 | "df_article_keywords = df[article_keywords]\n", 1194 | "\n", 1195 | "# Make all metrics numeric\n", 1196 | "for i in metrics:\n", 1197 | " df_article_keywords[i] = pd.to_numeric(df_article_keywords[i], errors='coerce')\n", 1198 | "\n", 1199 | "# Drop NA Values in Keywords Column\n", 1200 | "df_article_keywords['MaxLinkKeywords'].replace(' ', np.nan, inplace=True)\n", 1201 | "df_article_keywords.dropna(subset=['MaxLinkKeywords'], inplace=True)\n", 1202 | "\n", 1203 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1204 | "df_article_keywords[\"Type\"] = \"Article\"" 1205 | ] 1206 | }, 1207 | { 1208 | "cell_type": "markdown", 1209 | "metadata": {}, 1210 | "source": [ 1211 | "#### Thumbnail Keyword DataFrame " 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "code", 1216 | "execution_count": null, 1217 | "metadata": {}, 1218 | "outputs": [], 1219 | "source": [ 1220 | "# Create a list with only Thumbnail Keywords\n", 1221 | "thumbnail_keywords = [\"Text\", \"MaxThumbnailKeywords\"]\n", 1222 | "\n", 1223 | "# Append DataFrame with these metrics\n", 1224 | "thumbnail_keywords.extend(metrics)\n", 1225 | "\n", 1226 | "# Create a new DataFrame with keywords and metrics\n", 1227 | "df_thumbnail_keywords = df[thumbnail_keywords]\n", 1228 | "\n", 1229 | "# Make all metrics numeric\n", 1230 | "for i in metrics:\n", 1231 | " df_thumbnail_keywords[i] = pd.to_numeric(df_thumbnail_keywords[i], errors='coerce')\n", 1232 | " \n", 1233 | "# Drop NA Values in Keywords Column\n", 1234 | "df_thumbnail_keywords['MaxThumbnailKeywords'].replace(' ', np.nan, inplace=True)\n", 1235 | "df_thumbnail_keywords.dropna(subset=['MaxThumbnailKeywords'], inplace=True)\n", 1236 | "\n", 1237 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1238 | "df_thumbnail_keywords[\"Type\"] = \"Thumbnails\"" 1239 | ] 1240 | }, 1241 | { 1242 | "cell_type": "markdown", 1243 | "metadata": {}, 1244 | "source": [ 1245 | "#### Post Keyword DataFrame " 1246 | ] 1247 | }, 1248 | { 1249 | "cell_type": "code", 1250 | "execution_count": null, 1251 | "metadata": {}, 1252 | "outputs": [], 1253 | "source": [ 1254 | "# Create a list with only Thumbnail Keywords\n", 1255 | "post_keywords = [\"Text\", \"MaxTextKeywords\"]\n", 1256 | "\n", 1257 | "# Append DataFrame with these metrics\n", 1258 | "post_keywords.extend(metrics)\n", 1259 | "\n", 1260 | "# Create a new DataFrame with keywords and metrics\n", 1261 | "df_post_keywords = df[post_keywords]\n", 1262 | "\n", 1263 | "# Make all metrics numeric\n", 1264 | "for i in metrics:\n", 1265 | " df_post_keywords[i] = pd.to_numeric(df_post_keywords[i], errors='coerce')\n", 1266 | " \n", 1267 | "# Drop NA Values in Keywords Column\n", 1268 | "df_post_keywords['MaxTextKeywords'].replace(' ', np.nan, inplace=True)\n", 1269 | "df_post_keywords.dropna(subset=['MaxTextKeywords'], inplace=True)\n", 1270 | "\n", 1271 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1272 | "df_post_keywords[\"Type\"] = \"Posts\"" 1273 | ] 1274 | }, 1275 | { 1276 | "cell_type": "markdown", 1277 | "metadata": {}, 1278 | "source": [ 1279 | "#### Combine Post, Thumbnail, and Article DataFrames to Make One Keywords DataFrame" 1280 | ] 1281 | }, 1282 | { 1283 | "cell_type": "code", 1284 | "execution_count": null, 1285 | "metadata": {}, 1286 | "outputs": [], 1287 | "source": [ 1288 | "# First make the column headers the same\n", 1289 | "df_post_keywords.rename(columns={\"MaxTextKeywords\": \"Keywords\"}, inplace=True)\n", 1290 | "df_thumbnail_keywords.rename(columns={\"MaxThumbnailKeywords\":\"Keywords\"}, inplace=True)\n", 1291 | "df_article_keywords.rename(columns={\"MaxLinkKeywords\":\"Keywords\"}, inplace=True)\n", 1292 | "\n", 1293 | "# Combine into one data frame\n", 1294 | "df_keywords = pd.concat([df_post_keywords, df_thumbnail_keywords, df_article_keywords])\n", 1295 | "\n", 1296 | "# Discard posts with lower total reach to make charting easier\n", 1297 | "df_keywords = df_keywords[df_keywords[\"Lifetime Post Total Reach\"] > 20000]\n" 1298 | ] 1299 | }, 1300 | { 1301 | "cell_type": "markdown", 1302 | "metadata": {}, 1303 | "source": [ 1304 | "\n", 1305 | "### 1.4 Create a Consolidated Entity DataFrame\n", 1306 | "You'll create DataFrames for the entities of the article text, the thumbnail text, and the post text. Then you'll combine them into one DataFrame." 1307 | ] 1308 | }, 1309 | { 1310 | "cell_type": "markdown", 1311 | "metadata": {}, 1312 | "source": [ 1313 | "#### Article Entity DataFrame " 1314 | ] 1315 | }, 1316 | { 1317 | "cell_type": "code", 1318 | "execution_count": null, 1319 | "metadata": {}, 1320 | "outputs": [], 1321 | "source": [ 1322 | "# Create a list with only Article Keywords\n", 1323 | "article_entities = [\"Text\", \"MaxLinkEntity\"]\n", 1324 | "\n", 1325 | "# Append DataFrame with these metrics\n", 1326 | "article_entities.extend(metrics)\n", 1327 | "\n", 1328 | "# Create a new DataFrame with keywords and metrics\n", 1329 | "df_article_entities = df[article_entities]\n", 1330 | " \n", 1331 | "# Make all metrics numeric\n", 1332 | "for i in metrics:\n", 1333 | " df_article_entities[i] = pd.to_numeric(df_article_entities[i], errors='coerce')\n", 1334 | " \n", 1335 | "# Drop NA Values in Keywords Column\n", 1336 | "df_article_entities['MaxLinkEntity'] = df[\"MaxLinkEntity\"].replace(r'\\s+', np.nan, regex=True)\n", 1337 | "df_article_entities.dropna(subset=['MaxLinkEntity'], inplace=True)\n", 1338 | "\n", 1339 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1340 | "df_article_entities[\"Type\"] = \"Article\"" 1341 | ] 1342 | }, 1343 | { 1344 | "cell_type": "markdown", 1345 | "metadata": {}, 1346 | "source": [ 1347 | "#### Thumbnail Entity DataFrame" 1348 | ] 1349 | }, 1350 | { 1351 | "cell_type": "code", 1352 | "execution_count": null, 1353 | "metadata": {}, 1354 | "outputs": [], 1355 | "source": [ 1356 | "# Create a list with only Thumbnail Keywords\n", 1357 | "thumbnail_entities = [\"Text\", \"MaxThumbnailEntity\"]\n", 1358 | "\n", 1359 | "# Append DataFrame with these metrics\n", 1360 | "thumbnail_entities.extend(metrics)\n", 1361 | "\n", 1362 | "# Create a new DataFrame with keywords and metrics\n", 1363 | "df_thumbnail_entities = df[thumbnail_entities]\n", 1364 | "\n", 1365 | "# Make all metrics numeric\n", 1366 | "for i in metrics:\n", 1367 | " df_thumbnail_entities[i] = pd.to_numeric(df_thumbnail_entities[i], errors='coerce')\n", 1368 | " \n", 1369 | "# Drop NA Values in Keywords Column\n", 1370 | "df_thumbnail_entities['MaxThumbnailEntity'] = df_thumbnail_entities['MaxThumbnailEntity'].replace(r'\\s+', np.nan, regex=True)\n", 1371 | "df_thumbnail_entities.dropna(subset=['MaxThumbnailEntity'], inplace=True)\n", 1372 | "\n", 1373 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1374 | "df_thumbnail_entities[\"Type\"] = \"Thumbnails\"" 1375 | ] 1376 | }, 1377 | { 1378 | "cell_type": "markdown", 1379 | "metadata": {}, 1380 | "source": [ 1381 | "#### Post Entity DataFrame" 1382 | ] 1383 | }, 1384 | { 1385 | "cell_type": "code", 1386 | "execution_count": null, 1387 | "metadata": {}, 1388 | "outputs": [], 1389 | "source": [ 1390 | "# Create a list with only Thumbnail Keywords\n", 1391 | "post_entities = [\"Text\", \"MaxTextEntity\"]\n", 1392 | "\n", 1393 | "# Append DataFrame with these metrics\n", 1394 | "post_entities.extend(metrics)\n", 1395 | "\n", 1396 | "# Create a new DataFrame with keywords and metrics\n", 1397 | "df_post_entities = df[post_entities]\n", 1398 | "\n", 1399 | "# Make all metrics numeric\n", 1400 | "for i in metrics:\n", 1401 | " df_post_entities[i] = pd.to_numeric(df_post_entities[i], errors='coerce')\n", 1402 | " \n", 1403 | "# Drop NA Values in Keywords Column\n", 1404 | "df_post_entities['MaxTextEntity'] = df_post_entities['MaxTextEntity'].replace(r'\\s+', np.nan, regex=True)\n", 1405 | "df_post_entities.dropna(subset=['MaxTextEntity'], inplace=True)\n", 1406 | "\n", 1407 | "# Add in a column to distinguish what portion the enrichment was happening \n", 1408 | "df_post_entities[\"Type\"] = \"Posts\"" 1409 | ] 1410 | }, 1411 | { 1412 | "cell_type": "markdown", 1413 | "metadata": {}, 1414 | "source": [ 1415 | "#### Combine Post, Thumbnail, and Article DataFrames to Make One Entity DataFrame" 1416 | ] 1417 | }, 1418 | { 1419 | "cell_type": "code", 1420 | "execution_count": null, 1421 | "metadata": {}, 1422 | "outputs": [], 1423 | "source": [ 1424 | "# First make the column headers the same\n", 1425 | "df_post_entities.rename(columns={\"MaxTextEntity\": \"Entities\"}, inplace=True)\n", 1426 | "\n", 1427 | "df_thumbnail_entities.rename(columns={\"MaxThumbnailEntity\":\"Entities\"}, inplace=True)\n", 1428 | "\n", 1429 | "df_article_entities.rename(columns={\"MaxLinkEntity\":\"Entities\"}, inplace=True)\n", 1430 | "\n", 1431 | "# Combine into one data frame\n", 1432 | "df_entities = pd.concat([df_post_entities, df_thumbnail_entities, df_article_entities])\n", 1433 | "\n", 1434 | "df_entities[\"Entities\"] = df_entities[\"Entities\"].replace('', np.nan)\n", 1435 | "df_entities.dropna(subset=[\"Entities\"], inplace=True)" 1436 | ] 1437 | }, 1438 | { 1439 | "cell_type": "markdown", 1440 | "metadata": {}, 1441 | "source": [ 1442 | "\n", 1443 | "### 1.5 Create a Consolidated Image DataFrame" 1444 | ] 1445 | }, 1446 | { 1447 | "cell_type": "markdown", 1448 | "metadata": {}, 1449 | "source": [ 1450 | "#### Combine Metrics with Type Hierarchy, Class and Color to Make One Image DataFrame" 1451 | ] 1452 | }, 1453 | { 1454 | "cell_type": "code", 1455 | "execution_count": null, 1456 | "metadata": {}, 1457 | "outputs": [], 1458 | "source": [ 1459 | "if visual_recognition:\n", 1460 | " # Create a list with only Visual Recognition columns\n", 1461 | " pic_keywords = ['Image Type', 'Image Subtype', 'Image Subtype2', 'Image Class', 'Image Color']\n", 1462 | "\n", 1463 | " # Append DataFrame with these metrics\n", 1464 | " pic_keywords.extend(metrics)\n", 1465 | "\n", 1466 | " # Create a new DataFrame with keywords and metrics\n", 1467 | " df_pic_keywords = df[pic_keywords]\n", 1468 | "\n", 1469 | " # Make all metrics numeric\n", 1470 | " for i in metrics:\n", 1471 | " df_pic_keywords[i] = pd.to_numeric(df_pic_keywords[i], errors='coerce')\n", 1472 | "\n", 1473 | " # Discard posts with lower total reach to make charting easier\n", 1474 | " df_pic_keywords = df_pic_keywords[df_pic_keywords[\"Lifetime Post Total Reach\"] > 15000]" 1475 | ] 1476 | }, 1477 | { 1478 | "cell_type": "code", 1479 | "execution_count": null, 1480 | "metadata": {}, 1481 | "outputs": [], 1482 | "source": [ 1483 | "if visual_recognition:\n", 1484 | " images = df_pic_keywords[df_pic_keywords['Image Type'] != ' ']" 1485 | ] 1486 | }, 1487 | { 1488 | "cell_type": "markdown", 1489 | "metadata": {}, 1490 | "source": [ 1491 | " \n", 1492 | "# Part III\n", 1493 | " \n", 1494 | "## 1. Setup\n", 1495 | "\n", 1496 | "### 1.1 Assign Variables\n", 1497 | "Assign new DataFrames to variables. " 1498 | ] 1499 | }, 1500 | { 1501 | "cell_type": "code", 1502 | "execution_count": null, 1503 | "metadata": {}, 1504 | "outputs": [], 1505 | "source": [ 1506 | "entities = df_entities\n", 1507 | "tones = df_tones\n", 1508 | "keywords = df_keywords" 1509 | ] 1510 | }, 1511 | { 1512 | "cell_type": "markdown", 1513 | "metadata": {}, 1514 | "source": [ 1515 | "\n", 1516 | "## 2. Visualize Data\n", 1517 | " \n", 1518 | "### 2.1 Run PixieDust Visualization Library with Display() API\n", 1519 | "PixieDust lets you visualize your data in just a few clicks using the display() API. You can find more info at https://pixiedust.github.io/pixiedust/displayapi.html." 1520 | ] 1521 | }, 1522 | { 1523 | "cell_type": "markdown", 1524 | "metadata": {}, 1525 | "source": [ 1526 | "#### We can use a pie chart to identify how lifetime engagement was broken up by sentiment. \n", 1527 | "\n", 1528 | "Click on the `Options` button to change the chart. Here are some things to try:\n", 1529 | "* Add *Type* to make the breakdown show *Post* or *Article*.\n", 1530 | "* Show *Emotion* intead of *Sentiment* (or both).\n", 1531 | "* Try a different metric." 1532 | ] 1533 | }, 1534 | { 1535 | "cell_type": "code", 1536 | "execution_count": null, 1537 | "metadata": {}, 1538 | "outputs": [], 1539 | "source": [ 1540 | "import pixiedust" 1541 | ] 1542 | }, 1543 | { 1544 | "cell_type": "code", 1545 | "execution_count": null, 1546 | "metadata": { 1547 | "pixiedust": { 1548 | "displayParams": { 1549 | "aggregation": "SUM", 1550 | "chartsize": "70", 1551 | "charttype": "stacked", 1552 | "clusterby": "Type", 1553 | "handlerId": "pieChart", 1554 | "keyFields": "Emotion", 1555 | "legend": "true", 1556 | "mpld3": "false", 1557 | "orientation": "horizontal", 1558 | "rendererId": "matplotlib", 1559 | "rowCount": "100", 1560 | "sortby": "Values DESC", 1561 | "title": "Lifetime Engaged Users by Emotion", 1562 | "valueFields": "Lifetime Engaged Users", 1563 | "ylabel": "true" 1564 | } 1565 | } 1566 | }, 1567 | "outputs": [], 1568 | "source": [ 1569 | "display(tones)" 1570 | ] 1571 | }, 1572 | { 1573 | "cell_type": "markdown", 1574 | "metadata": {}, 1575 | "source": [ 1576 | "#### Now let's look at the same statistics as a bar chart.\n", 1577 | "\n", 1578 | "It is the same line of code. Use the `Edit Metadata` button to see how PixieDust knows to show us a bar chart. If you don't have a button use the menu and select `View > Cell Toolbar > Edit Metadata`.\n", 1579 | "\n", 1580 | "A bar chart is better at showing more information. We added `Cluster By: Type` so we already see numbers for posts and articles. Notice what the chart tells you. Most of our articles and posts are `positive`. But what sentiment really engages more users? Click on `Options` and try this:\n", 1581 | "\n", 1582 | "* Change the aggregation to `AVG`.\n", 1583 | "\n", 1584 | "What sentiment leads to higher average engagement?\n" 1585 | ] 1586 | }, 1587 | { 1588 | "cell_type": "code", 1589 | "execution_count": null, 1590 | "metadata": { 1591 | "pixiedust": { 1592 | "displayParams": { 1593 | "aggregation": "SUM", 1594 | "chartsize": "70", 1595 | "charttype": "stacked", 1596 | "clusterby": "Type", 1597 | "handlerId": "barChart", 1598 | "keyFields": "Sentiment", 1599 | "legend": "true", 1600 | "orientation": "horizontal", 1601 | "rendererId": "matplotlib", 1602 | "rowCount": "100", 1603 | "sortby": "Values DESC", 1604 | "title": "Lifetime Engaged Users by Sentiment", 1605 | "valueFields": "Lifetime Engaged Users" 1606 | } 1607 | }, 1608 | "scrolled": false 1609 | }, 1610 | "outputs": [], 1611 | "source": [ 1612 | "display(tones)" 1613 | ] 1614 | }, 1615 | { 1616 | "cell_type": "markdown", 1617 | "metadata": {}, 1618 | "source": [ 1619 | "#### Now let's look at the entities that were detected by Natural Language Understanding.\n", 1620 | "\n", 1621 | "The following bar chart shows the entities that were detected. This time we are stacking negative feedback and \"likes\" to get a picture of the kind of feedback the entities were getting. We chose a horizontal, stacked bar chart with descending values for a little variety.\n", 1622 | "\n", 1623 | "* Try a different renderer and see what you get." 1624 | ] 1625 | }, 1626 | { 1627 | "cell_type": "code", 1628 | "execution_count": null, 1629 | "metadata": { 1630 | "pixiedust": { 1631 | "displayParams": { 1632 | "aggregation": "SUM", 1633 | "chartsize": "70", 1634 | "charttype": "stacked", 1635 | "handlerId": "barChart", 1636 | "keyFields": "Entities", 1637 | "orientation": "horizontal", 1638 | "rendererId": "matplotlib", 1639 | "rowCount": "100", 1640 | "sortby": "Values DESC", 1641 | "title": "Entities in Posts and Articles", 1642 | "valueFields": "Lifetime Post Stories by action type - like,Lifetime Negative Feedback from Users" 1643 | } 1644 | }, 1645 | "scrolled": false 1646 | }, 1647 | "outputs": [], 1648 | "source": [ 1649 | "display(entities)" 1650 | ] 1651 | }, 1652 | { 1653 | "cell_type": "markdown", 1654 | "metadata": {}, 1655 | "source": [ 1656 | "#### Next we look at the keywords detected by Natural Language Understanding\n" 1657 | ] 1658 | }, 1659 | { 1660 | "cell_type": "code", 1661 | "execution_count": null, 1662 | "metadata": { 1663 | "pixiedust": { 1664 | "displayParams": { 1665 | "aggregation": "SUM", 1666 | "chartsize": "85", 1667 | "charttype": "stacked", 1668 | "clusterby": "Type", 1669 | "handlerId": "barChart", 1670 | "keyFields": "Keywords", 1671 | "legend": "true", 1672 | "mpld3": "false", 1673 | "orientation": "horizontal", 1674 | "rendererId": "matplotlib", 1675 | "rowCount": "100", 1676 | "sortby": "Values DESC", 1677 | "timeseries": "false", 1678 | "title": "Keyword Total Reach", 1679 | "valueFields": "Lifetime Post Total Reach" 1680 | } 1681 | }, 1682 | "scrolled": false 1683 | }, 1684 | "outputs": [], 1685 | "source": [ 1686 | "display(keywords)" 1687 | ] 1688 | }, 1689 | { 1690 | "cell_type": "markdown", 1691 | "metadata": {}, 1692 | "source": [ 1693 | "#### Now let's take a look at what Visual Recognition can show us.\n", 1694 | "\n", 1695 | "See how the images influenced the metrics. We've used visual recognition to identify a class and a type hierarchy for each image. We've also captured the top recognized color for each image. Our sample data doesn't have a significant number of data points, but these three charts demonstrate how you could:\n", 1696 | "\n", 1697 | "1. Recognize image classes that correlate to higher total reach.\n", 1698 | "1. Add a type hierarchy for a higher level abstraction or to add grouping/stacking to the class data.\n", 1699 | "1. Determine if image color correlates to total reach.\n", 1700 | "\n", 1701 | "Visual recognition makes it surprisingly easy to do all of the above. Of course, you can easily try different metrics as you experiment. If you are not convinced that you should add ultramarine laser pictures to all of your articles, then you might want to do some research with a better data sample." 1702 | ] 1703 | }, 1704 | { 1705 | "cell_type": "code", 1706 | "execution_count": null, 1707 | "metadata": { 1708 | "pixiedust": { 1709 | "displayParams": { 1710 | "aggregation": "SUM", 1711 | "chartsize": "85", 1712 | "charttype": "stacked", 1713 | "handlerId": "barChart", 1714 | "keyFields": "Image Class", 1715 | "legend": "true", 1716 | "orientation": "horizontal", 1717 | "rendererId": "matplotlib", 1718 | "rowCount": "100", 1719 | "sortby": "Values DESC", 1720 | "title": "Image Classes", 1721 | "valueFields": "Lifetime Post Total Reach" 1722 | } 1723 | } 1724 | }, 1725 | "outputs": [], 1726 | "source": [ 1727 | "if visual_recognition:\n", 1728 | " display(images)" 1729 | ] 1730 | }, 1731 | { 1732 | "cell_type": "code", 1733 | "execution_count": null, 1734 | "metadata": { 1735 | "pixiedust": { 1736 | "displayParams": { 1737 | "aggregation": "SUM", 1738 | "chartsize": "85", 1739 | "charttype": "stacked", 1740 | "clusterby": "Image Type", 1741 | "handlerId": "barChart", 1742 | "keyFields": "Image Subtype", 1743 | "legend": "true", 1744 | "mpld3": "false", 1745 | "orientation": "horizontal", 1746 | "rendererId": "matplotlib", 1747 | "rowCount": "100", 1748 | "sortby": "Values DESC", 1749 | "stretch": "false", 1750 | "title": "Image Type Hierarchy", 1751 | "valueFields": "Lifetime Post Total Reach" 1752 | } 1753 | } 1754 | }, 1755 | "outputs": [], 1756 | "source": [ 1757 | "if visual_recognition:\n", 1758 | " display(images)" 1759 | ] 1760 | }, 1761 | { 1762 | "cell_type": "code", 1763 | "execution_count": null, 1764 | "metadata": { 1765 | "pixiedust": { 1766 | "displayParams": { 1767 | "aggregation": "SUM", 1768 | "chartsize": "85", 1769 | "charttype": "stacked", 1770 | "handlerId": "barChart", 1771 | "keyFields": "Image Color", 1772 | "legend": "true", 1773 | "orientation": "horizontal", 1774 | "rendererId": "matplotlib", 1775 | "rowCount": "100", 1776 | "sortby": "Values DESC", 1777 | "title": "Image Color", 1778 | "valueFields": "Lifetime Post Total Reach" 1779 | } 1780 | } 1781 | }, 1782 | "outputs": [], 1783 | "source": [ 1784 | "if visual_recognition:\n", 1785 | " display(images)" 1786 | ] 1787 | }, 1788 | { 1789 | "cell_type": "markdown", 1790 | "metadata": {}, 1791 | "source": [ 1792 | "
\n", 1793 | "Copyright © IBM Corp. 2017, 2018. This notebook and its source code are released under the terms of the Apache 2.0." 1794 | ] 1795 | }, 1796 | { 1797 | "cell_type": "markdown", 1798 | "metadata": {}, 1799 | "source": [ 1800 | "Licensed under the Apache License, Version 2.0 (the \"License\"); you may\n", 1801 | "not use this file except in compliance with the License. You may obtain\n", 1802 | "a copy of the License at\n", 1803 | "\n", 1804 | " http://www.apache.org/licenses/LICENSE-2.0\n", 1805 | "\n", 1806 | "Unless required by applicable law or agreed to in writing, software\n", 1807 | "distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n", 1808 | "WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n", 1809 | "License for the specific language governing permissions and limitations\n", 1810 | "under the License." 1811 | ] 1812 | } 1813 | ], 1814 | "metadata": { 1815 | "kernelspec": { 1816 | "display_name": "Python 3.6", 1817 | "language": "python", 1818 | "name": "python3" 1819 | }, 1820 | "language_info": { 1821 | "codemirror_mode": { 1822 | "name": "ipython", 1823 | "version": 3 1824 | }, 1825 | "file_extension": ".py", 1826 | "mimetype": "text/x-python", 1827 | "name": "python", 1828 | "nbconvert_exporter": "python", 1829 | "pygments_lexer": "ipython3", 1830 | "version": "3.6.9" 1831 | } 1832 | }, 1833 | "nbformat": 4, 1834 | "nbformat_minor": 1 1835 | } 1836 | --------------------------------------------------------------------------------