├── pictures
├── ghana.jpg
└── russia.jpg
├── category_dict.py
├── README.md
├── .gitignore
├── country_by_continent.csv
├── ProjectProposalGroup_035-Fa22.ipynb
├── DataCheckpointGroup_035-Fa22.ipynb
└── Previous.ipynb
/pictures/ghana.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COGS108/Group_035-Fa22/master/pictures/ghana.jpg
--------------------------------------------------------------------------------
/pictures/russia.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COGS108/Group_035-Fa22/master/pictures/russia.jpg
--------------------------------------------------------------------------------
/category_dict.py:
--------------------------------------------------------------------------------
1 | spiciness_dict = {
2 | 'tonkotsu ramen': 'mild',
3 | 'japanese style': 'mild',
4 | 'tom yum': 'hot',
5 | 'chicken flavor': 'mild',
6 | 'seafood flavor': 'mild',
7 | 'beef flavor': 'mild',
8 | 'mi goreng': 'mild',
9 | 'black pepper': 'hot',
10 | 'soy sauce': 'mild',
11 | 'chow mein': 'mild',
12 | 'pork flavor': 'mild',
13 | 'shrimp flavour':'mild',
14 | 'crab flavour':'mild',
15 | 'seafood flavour': 'mild',
16 | 'xo sauce': 'mild',
17 | 'seafood flavour': 'mild',
18 | 'sesame oil': 'mild',
19 | 'artificial pork': 'mild',
20 | 'chicken soup': 'mild',
21 | 'mi segera': 'mild',
22 | 'penang white': 'hot',
23 | 'yum kung': 'hot',
24 | 'soy paste': 'mild',
25 | 'pancit canton': 'mild',
26 | 'sweet potato': 'mild',
27 | 'kuah rasa': 'mild',
28 | 'rasa ayam': 'mild',
29 | 'bokkeummyun': 'hot',
30 | 'shin ramyun': 'hot',
31 | 'tom yam': 'hot',
32 | 'demae iccho': 'mild',
33 | 'sopa nissin': 'mild',
34 | 'malaysia penang': 'hot',
35 | 'south korean': 'hot'}
36 |
37 | spiciness_word = {
38 | 'laksa': 'hot',
39 | 'buldak': 'hot',
40 | 'curry': 'hot',
41 | 'fiery': 'hot',
42 | 'sichuan': 'hot',
43 | 'kimchi': 'hot',
44 | 'udon': 'mild',
45 | 'tempura': 'mild',
46 | 'tonkotsu': 'mild',
47 | 'shoyu': 'mild',
48 | 'korean': 'hot',
49 | 'shio': 'mild',
50 | 'creamy' : 'mild',
51 | 'demae': 'mild',
52 | 'artificial': 'mild'}
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This is your group repo for your final project for COGS108.
2 |
3 | This repository is private, and is only visible to the course instructors and your group mates; it is not visible to anyone else.
4 |
5 | Template notebooks for each component are provided. Only work on the notebook prior to its due date. After each submission is due, move onto the next notebook (For example, after the proposal is due, start working in the Data Checkpoint notebook).
6 |
7 | This repository will be frozen on the final project due date. No further changes can be made after that time.
8 |
9 | Your project proposal and final project will be graded based solely on the corresponding project notebooks in this repository.
10 |
11 | Template Jupyter notebooks have been included, with your group number replacing the XXX in the following file names. For each due date, make sure you have a notebook present in this repository by each due date with the following name (where XXX is replaced by your group number):
12 |
13 | - `ProjectProposal_groupXXX.ipynb`
14 | - `DataCheckpoint_groupXXX.ipynb`
15 | - `EDACheckpoint_groupXXX.ipynb`
16 | - `FinalProject_groupXXX.ipynb`
17 |
18 | This is *your* repo. You are free to manage the repo as you see fit, edit this README, add data files, add scripts, etc. So long as there are the four files above on due dates with the required information, the rest is up to you all.
19 |
20 | Also, you are free and encouraged to share this project after the course and to add it to your portfolio. Just be sure to fork it to your GitHub at the end of the quarter!
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 |
--------------------------------------------------------------------------------
/country_by_continent.csv:
--------------------------------------------------------------------------------
1 | "country","continent"
2 | "Algeria","Africa"
3 | "Angola","Africa"
4 | "Benin","Africa"
5 | "Botswana","Africa"
6 | "Burkina Faso","Africa"
7 | "Burundi","Africa"
8 | "Cameroon","Africa"
9 | "Cape Verde","Africa"
10 | "Central African Republic","Africa"
11 | "Chad","Africa"
12 | "Comoros","Africa"
13 | "Djibouti","Africa"
14 | "DR Congo","Africa"
15 | "Egypt","Africa"
16 | "Equatorial Guinea","Africa"
17 | "Eritrea","Africa"
18 | "Eswatini","Africa"
19 | "Ethiopia","Africa"
20 | "Gabon","Africa"
21 | "Gambia","Africa"
22 | "Ghana","Africa"
23 | "Guinea","Africa"
24 | "Guinea-Bissau","Africa"
25 | "Ivory Coast","Africa"
26 | "Kenya","Africa"
27 | "Lesotho","Africa"
28 | "Liberia","Africa"
29 | "Libya","Africa"
30 | "Madagascar","Africa"
31 | "Malawi","Africa"
32 | "Mali","Africa"
33 | "Mauritania","Africa"
34 | "Mauritius","Africa"
35 | "Mayotte","Africa"
36 | "Morocco","Africa"
37 | "Mozambique","Africa"
38 | "Namibia","Africa"
39 | "Niger","Africa"
40 | "Nigeria","Africa"
41 | "Republic of the Congo","Africa"
42 | "Reunion","Africa"
43 | "Rwanda","Africa"
44 | "Sao Tome and Principe","Africa"
45 | "Senegal","Africa"
46 | "Seychelles","Africa"
47 | "Sierra Leone","Africa"
48 | "Somalia","Africa"
49 | "South Africa","Africa"
50 | "South Sudan","Africa"
51 | "Sudan","Africa"
52 | "Tanzania","Africa"
53 | "Togo","Africa"
54 | "Tunisia","Africa"
55 | "Uganda","Africa"
56 | "Western Sahara","Africa"
57 | "Zambia","Africa"
58 | "Zimbabwe","Africa"
59 | "Afghanistan","Asia"
60 | "Armenia","Asia"
61 | "Azerbaijan","Asia"
62 | "Bahrain","Asia"
63 | "Bangladesh","Asia"
64 | "Bhutan","Asia"
65 | "Brunei","Asia"
66 | "Cambodia","Asia"
67 | "China","Asia"
68 | "Georgia","Asia"
69 | "Hong Kong","Asia"
70 | "India","Asia"
71 | "Indonesia","Asia"
72 | "Iran","Asia"
73 | "Iraq","Asia"
74 | "Israel","Asia"
75 | "Japan","Asia"
76 | "Jordan","Asia"
77 | "Kazakhstan","Asia"
78 | "Kuwait","Asia"
79 | "Kyrgyzstan","Asia"
80 | "Laos","Asia"
81 | "Lebanon","Asia"
82 | "Macau","Asia"
83 | "Malaysia","Asia"
84 | "Maldives","Asia"
85 | "Mongolia","Asia"
86 | "Myanmar","Asia"
87 | "Nepal","Asia"
88 | "North Korea","Asia"
89 | "Oman","Asia"
90 | "Pakistan","Asia"
91 | "Palestine","Asia"
92 | "Philippines","Asia"
93 | "Qatar","Asia"
94 | "Saudi Arabia","Asia"
95 | "Singapore","Asia"
96 | "South Korea","Asia"
97 | "Sri Lanka","Asia"
98 | "Syria","Asia"
99 | "Taiwan","Asia"
100 | "Tajikistan","Asia"
101 | "Thailand","Asia"
102 | "Timor-Leste","Asia"
103 | "Turkey","Asia"
104 | "Turkmenistan","Asia"
105 | "United Arab Emirates","Asia"
106 | "Uzbekistan","Asia"
107 | "Vietnam","Asia"
108 | "Yemen","Asia"
109 | "Albania","Europe"
110 | "Andorra","Europe"
111 | "Austria","Europe"
112 | "Belarus","Europe"
113 | "Belgium","Europe"
114 | "Bosnia and Herzegovina","Europe"
115 | "Bulgaria","Europe"
116 | "Croatia","Europe"
117 | "Cyprus","Europe"
118 | "Czech Republic","Europe"
119 | "Denmark","Europe"
120 | "Estonia","Europe"
121 | "Faroe Islands","Europe"
122 | "Finland","Europe"
123 | "France","Europe"
124 | "Germany","Europe"
125 | "Gibraltar","Europe"
126 | "Greece","Europe"
127 | "Guernsey","Europe"
128 | "Hungary","Europe"
129 | "Iceland","Europe"
130 | "Ireland","Europe"
131 | "Isle of Man","Europe"
132 | "Italy","Europe"
133 | "Jersey","Europe"
134 | "Latvia","Europe"
135 | "Liechtenstein","Europe"
136 | "Lithuania","Europe"
137 | "Luxembourg","Europe"
138 | "Malta","Europe"
139 | "Moldova","Europe"
140 | "Monaco","Europe"
141 | "Montenegro","Europe"
142 | "Netherlands","Europe"
143 | "North Macedonia","Europe"
144 | "Norway","Europe"
145 | "Poland","Europe"
146 | "Portugal","Europe"
147 | "Romania","Europe"
148 | "Russia","Europe"
149 | "San Marino","Europe"
150 | "Serbia","Europe"
151 | "Slovakia","Europe"
152 | "Slovenia","Europe"
153 | "Spain","Europe"
154 | "Sweden","Europe"
155 | "Switzerland","Europe"
156 | "Ukraine","Europe"
157 | "United Kingdom","Europe"
158 | "Vatican City","Europe"
159 | "Anguilla","North America"
160 | "Antigua and Barbuda","North America"
161 | "Aruba","North America"
162 | "Bahamas","North America"
163 | "Barbados","North America"
164 | "Belize","North America"
165 | "Bermuda","North America"
166 | "British Virgin Islands","North America"
167 | "Canada","North America"
168 | "Cayman Islands","North America"
169 | "Costa Rica","North America"
170 | "Cuba","North America"
171 | "Curacao","North America"
172 | "Dominica","North America"
173 | "Dominican Republic","North America"
174 | "El Salvador","North America"
175 | "Greenland","North America"
176 | "Grenada","North America"
177 | "Guadeloupe","North America"
178 | "Guatemala","North America"
179 | "Haiti","North America"
180 | "Honduras","North America"
181 | "Jamaica","North America"
182 | "Martinique","North America"
183 | "Mexico","North America"
184 | "Montserrat","North America"
185 | "Nicaragua","North America"
186 | "Panama","North America"
187 | "Puerto Rico","North America"
188 | "Saint Barthelemy","North America"
189 | "Saint Kitts and Nevis","North America"
190 | "Saint Lucia","North America"
191 | "Saint Martin","North America"
192 | "Saint Pierre and Miquelon","North America"
193 | "Saint Vincent and the Grenadines","North America"
194 | "Sint Maarten","North America"
195 | "Trinidad and Tobago","North America"
196 | "Turks and Caicos Islands","North America"
197 | "United States","North America"
198 | "United States Virgin Islands","North America"
199 | "American Samoa","Oceania"
200 | "Australia","Oceania"
201 | "Cook Islands","Oceania"
202 | "Fiji","Oceania"
203 | "French Polynesia","Oceania"
204 | "Guam","Oceania"
205 | "Kiribati","Oceania"
206 | "Marshall Islands","Oceania"
207 | "Micronesia","Oceania"
208 | "Nauru","Oceania"
209 | "New Caledonia","Oceania"
210 | "New Zealand","Oceania"
211 | "Niue","Oceania"
212 | "Northern Mariana Islands","Oceania"
213 | "Palau","Oceania"
214 | "Papua New Guinea","Oceania"
215 | "Samoa","Oceania"
216 | "Solomon Islands","Oceania"
217 | "Tokelau","Oceania"
218 | "Tonga","Oceania"
219 | "Tuvalu","Oceania"
220 | "Vanuatu","Oceania"
221 | "Wallis and Futuna","Oceania"
222 | "Argentina","South America"
223 | "Bolivia","South America"
224 | "Brazil","South America"
225 | "Chile","South America"
226 | "Colombia","South America"
227 | "Ecuador","South America"
228 | "Falkland Islands","South America"
229 | "French Guiana","South America"
230 | "Guyana","South America"
231 | "Paraguay","South America"
232 | "Peru","South America"
233 | "Suriname","South America"
234 | "Uruguay","South America"
235 | "Venezuela","South America"
--------------------------------------------------------------------------------
/ProjectProposalGroup_035-Fa22.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# COGS 108 - Final Project Proposal"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Names\n",
15 | "\n",
16 | "- Maxwell Fang\n",
17 | "- Long Le\n",
18 | "- Huy Trinh\n",
19 | "- Hasan Shaikh\n",
20 | "- Mohammed Master"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "# Research Question"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "How does the spice level of instant ramen affect the star rating on the www.theramenrater.com in regards to preference across each country\n",
35 | "\n"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "## Background and Prior Work"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "- Include a general introduction to your topic\n",
50 | " \n",
51 | " * What is Instant Ramen? Instant Ramen is a staple food for college students and busy people throughout the world. People eat it for breakfast, lunch, dinner, and at any time of the day. Instant Ramen is a cheap option and consists of precooked noodles with some type of flavor powder. It is common around the world and each country has a favorite brand. Some famous brands are NongShim, Indomie, and Maruchan. \n",
52 | " - This topic is very significant to our group because as college students we all have tried various types of noodles. We love food and want to share our love of food and combine it with data science. We have looked deep into our dataset and categorized it with brands, countries, and ratings. \n",
53 | " - We are curious to see if the spice level of instant ramen can have an effect on the rating deepening in the different countries it is tried in. We know that some countries prefer spicy food so possibly the spicier instant ramens has a higher rating three, whilst countries that cannot tolerate much spice may have a greater rating for less spicier flavors. \n",
54 | "\n",
55 | "Include explanation of what work has been done previously\n",
56 | "\n",
57 | "- Previous work done on this topic has mostly focused on the most popular flavors/varieties of ramen across the world with a large focus on the types of brands that are favorites along with data often being extracted from online reviews. The goal of these studies was to use machine learning to predict how certain varieties of ramen would prosper in certain regions while simultaneously analyzing the characteristics that make certain ramen better than others.\n",
58 | "\n",
59 | "Include citations or links to previous work\n",
60 | "\n",
61 | "- https://towardsdatascience.com/exploring-the-world-of-ramen-through-text-analytics-1131280c4c6b\n",
62 | "- https://ghostleek.medium.com/exploratory-data-analysis-with-ramen-ratings-d4b9394b0acf"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "# Hypothesis\n"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "What is your main hypothesis/predictions about what the answer to your question is? Briefly explain your thinking. (2-3 sentences)\n",
77 | "\n",
78 | "We believe that (South and Southeast) Asian countries have a higher star rating for spicy instant ramen noodles than other countries on average.\n",
79 | "A large portion of ramen manufacturers in Asia produce spicy-flavored ramen; we believe that ratings for spicy ramen will be higher than those that are not spicy.\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "# Data"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "1. Explain what the **ideal** dataset you would want to answer this question. (This should include: What variables? How many observations? Who/what/how would these data be collected? How would these data be stored/organized?)\n",
94 | "\n",
95 | "This question is quite clear to the audience that they can imagine themselves. An ideal dataset should contain the average rating from a random sample instant ramen noodle. An ideal dataset should include the variables: 1) spiciness level 2) country origin 3) average rating from a random sample population. Ideally, we should have all types of ramen in our dataset. We would want to see the organization's name and its original country that produces the ramen. Within those products, we will see the package name and also how spicy it is. It is also important to see how customers would rate those instant ramen when they try them. We should also know the population that rate those ramen since not everyone can try all of them. A record of around more than 1000 rows should be durable enough to analyze. Since the dataset is not to that extent of more than 1 GB, we can store it in a csv file instead of zipping it into json format.\n",
96 | "\n",
97 | "\n",
98 | "2. Search for potential **real** datasets that could provide you with something useful for this project. You do not have to find every piece of data you will use, but you do need to have demonstrated some idea that (a) this data is gettable and (b) that this data may be different from what your ideal is\n",
99 | "\n",
100 | "(a) A dataset we found is on Kaggle Top Ramen Ratings 2022 . Another one (Ramen Ratings) we use is from 5 years ago when we compare the trend of ratings. Both dataset have the same variables: brand name, variety or ramen package name, style, country and stars. They are well-formatted or cleansed csv files that we can download from Kaggle since they are publicly available. It is quite limited to the fact that there is no indication of the general population rating.*\n",
101 | "\n",
102 | "(b) After having researched on how the dataset is made, we found that the ratings come from only 1 person, which is from a guy who has eaten and made an opinion on those ramen surrounding his areas. We do not have average ratings described in our ideal circumstances. Kaggle dataset collected information from The Definitive Guide To Samyang Buldak 2.0 article. How we know that this site is only reviewed by one person is proven from the author’s HOW I REVIEW / DISCLOSURE. Another issue is extracting the spiciness level from “variety” column (text field), which describes the name of the ramen package. We have to make an assumption that certain flavors will be spicy such as “kimchi”, “curry”, “heat”, “hot”."
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "# Ethics & Privacy"
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "- Thoughtful discussion of ethical concerns included\n",
117 | "\n",
118 | " - Ethical concerns may be if the data set includes any intrusive or confidential information regarding persons personal data.\n",
119 | "- Ethical concerns consider the whole data science process (question asked, data collected, data being used, the bias in data, analysis, post-analysis, etc.)\n",
120 | "\n",
121 | " * How the data was collected should also be considered in regards to if the data is allowed to be used. For example, if the subject chose not to use their data for research, it would have been unethical to use their responses. \n",
122 | " * The questions asked in order to attain data should also not be leading or biased in any form (this must be confirmed). \n",
123 | " * The inherent cultural bias also needs to be addressed regarding ramen in certain countries such as Asian countries having a preference for it while Middle Eastern countries may not have a preference for it.\n",
124 | " * Analysis of the data needs to also be interpreted equally among all countries and without any implicit bias from researchers. This potential bias is unavoidable but must be addressed within the post-analysis if relevant to the study.\n",
125 | "\n",
126 | "- How your group handled bias/ethical concerns clearly described\n",
127 | " - How were questions asked to obtain data? Any bias within questions asked? Any questions left out?\n",
128 | " - Is there a need to address ramen’s popularity within other countries? Data and research may imply a direct yes or no to our research question without knowing if ramen is even eaten in a region.\n",
129 | " - Acknowledgement of implicit bias as we, as researchers, are all predominantly 1st or 2nd gen American Asian/American South Asian UC San Diego students of the same age range.\n",
130 | " - Spice level of ramen is also subjective. Some ramen packets may be considered spicy in one region but mild in another. There must be a level of standardization in order to make valid conclusions.\n",
131 | " - The intentions of us researchers must be made completely clear without negative thoughts or actions in mind. This includes us, the researchers, explaining cohesively our motives for this research and why we specifically chose this topic to study and explore. The data must be handled in a fair manner whilst interpreting it respectfully with careful regard for each culture and country’s respect.\n",
132 | "\n",
133 | "\n",
134 | "\n",
135 | "Acknowledge and address any ethics & privacy related issues of your question(s), proposed dataset(s), and/or analyses. Use the information provided in lecture to guide your group discussion and thinking. If you need further guidance, check out [Deon's Ethics Checklist](http://deon.drivendata.org/#data-science-ethics-checklist). In particular:\n",
136 | "\n",
137 | "- Are there any biases/privacy/terms of use issues with the data you propsed?\n",
138 | "\n",
139 | "\n",
140 | " - We believe that there might be overrepresentation/underrepresentation of companies that do not use email as their main method of communication or outside of the U.S. Companies that are outside of the U.S. might have a higher barrier to send their instant ramen samples to the United States compared to companies that have their factory/distributing factory inside the U.S.\n",
141 | " - Since our question deals with a certain characteristic (spiciness) of the instant ramen, we do not care about data privacy because we do not have to anonymize the identity of the noodles from our dataset. In fact, we plan on using all of the available attributes of the dataset to help with our data analysis. \n",
142 | " - In terms of terms of use, we can see that the dataset is publicly available on Kaggle for us to perform EDA .\n",
143 | "\n",
144 | "\n",
145 | "- Are there potential biases in your dataset(s), in terms of who it composes, and how it was collected, that may be problematic in terms of it allowing for equitable analysis? (For example, does your data exclude particular populations, or is it likely to reflect particular human biases in a way that could be a problem?)\n",
146 | "\n",
147 | "\n",
148 | " - According to the original website where the data is collected, the website owner (Hans Lienesch) contacted different instant ramen companies by email and asked for their sample through mail. Because of this way of obtaining the sample noodle, some companies that do not use email as their main method of communication or companies outside of the United States find it more difficult to have their products sent to this reviewer. Besides, he also obtains his instant ramen noodles for review from buying the instant ramen noodles from his local grocery store. Because of this, the instant ramen noodles in the dataset is a biased sample of the noodles that are available in his area.\n",
149 | " - Besides, the review (Hans Lienesch) might also miss instant ramen companies that have a small representation in the United States. Because of this, he might not contact those companies all together.\n",
150 | "\n",
151 | "- How will you set out to detect these specific biases before, during, and after/when communicating your analysis?\n",
152 | "\n",
153 | "\n",
154 | " - Since our problem concerns the data sampling process. I do not think that we can resample the data to overcome this sampling bias. However, we would be very careful with our conclusion statement.\n",
155 | " - We can already establish that the sample of instant ramen noodles is a biased sample.\n",
156 | "\n",
157 | "\n",
158 | "\n",
159 | "- Are there any other issues related to your topic area, data, and/or analyses that are potentially problematic in terms of data privacy and equitable impact?\n",
160 | "\n",
161 | "Beside the identified problem, we do not think that there is any other big problem with our hypothesis question.\n",
162 | "- How will you handle issues you identified?\n",
163 | "\n",
164 | "\n",
165 | " - Because of this biased data sampling method, we cannot generalize the answer to infer any causal relationship.\n",
166 | " - We can only generalize our findings to the instant ramen samples that he received.\n"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "# Team Expectations "
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {},
179 | "source": [
180 | "\n",
181 | "Read over the [COGS108 Team Policies](https://github.com/COGS108/Projects/blob/master/COGS108_TeamPolicies.md) individually. Then, include your group’s expectations of one another for successful completion of your COGS108 project below. Discuss and agree on what all of your expectations are. Discuss how your team will communicate throughout the quarter and consider how you will communicate respectfully should conflicts arise. By including each member’s name above and by adding their name to the submission, you are indicating that you have read the COGS108 Team Policies, accept your team’s expectations below, and have every intention to fulfill them. These expectations are for your team’s use and benefit — they won’t be graded for their details.\n",
182 | "\n",
183 | "* *Team Expectation 1*\n",
184 | " * Please complete the assigned task. If you believe that the work distribution is unfair, please speak up.\n",
185 | "* *Team Expectation 2*\n",
186 | " * If you are stuck on a certain part of your task, please reach out to other members (or faculties). Just in the case of an emergency, please let other members know ahead of time so that the team can rearrange the work appropriately.\n",
187 | " \n",
188 | "* *Team Expecation 3*\n",
189 | " * Keep active communication with other members of the group. Notify the group of changes into the repo or any contributions towards the project.\n"
190 | ]
191 | },
192 | {
193 | "cell_type": "markdown",
194 | "metadata": {},
195 | "source": [
196 | "# Project Timeline Proposal"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {},
202 | "source": [
203 | "Specify your team's specific project timeline. An example timeline has been provided. Changes the dates, times, names, and details to fit your group's plan.\n",
204 | "\n",
205 | "If you think you will need any special resources or training outside what we have covered in COGS 108 to solve your problem, then your proposal should state these clearly. For example, if you have selected a problem that involves implementing multiple neural networks, please state this so we can make sure you know what you’re doing and so we can point you to resources you will need to implement your project. Note that you are not required to use outside methods.\n",
206 | "\n",
207 | "\n",
208 | "\n",
209 | "| Meeting Date | Meeting Time| Completed Before Meeting | Discuss at Meeting |\n",
210 | "|---|---|---|---|\n",
211 | "| 1/20 | 1 PM | Read & Think about COGS 108 expectations; brainstorm topics/questions | Determine best form of communication; Discuss and decide on final project topic; discuss hypothesis; begin background research | \n",
212 | "| 1/26 | 10 AM | Do background research on topic | Discuss ideal dataset(s) and ethics; draft project proposal | \n",
213 | "| 2/1 | 10 AM | Edit, finalize, and submit proposal; Search for datasets | Discuss Wrangling and possible analytical approaches; Assign group members to lead each specific part |\n",
214 | "| 2/14 | 6 PM | Import & Wrangle Data (Ant Man); EDA (Hulk) | Review/Edit wrangling/EDA; Discuss Analysis Plan |\n",
215 | "| 2/23 | 12 PM | Finalize wrangling/EDA; Begin Analysis (Iron Man; Thor) | Discuss/edit Analysis; Complete project check-in |\n",
216 | "| 3/13 | 12 PM | Complete analysis; Draft results/conclusion/discussion (Wasp)| Discuss/edit full project |\n",
217 | "| 3/19 | Before 11:59 PM | NA | Turn in Final Project & Group Project Surveys |"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": null,
223 | "metadata": {},
224 | "outputs": [],
225 | "source": []
226 | }
227 | ],
228 | "metadata": {
229 | "kernelspec": {
230 | "display_name": "Python 3",
231 | "language": "python",
232 | "name": "python3"
233 | },
234 | "language_info": {
235 | "codemirror_mode": {
236 | "name": "ipython",
237 | "version": 3
238 | },
239 | "file_extension": ".py",
240 | "mimetype": "text/x-python",
241 | "name": "python",
242 | "nbconvert_exporter": "python",
243 | "pygments_lexer": "ipython3",
244 | "version": "3.8.5"
245 | }
246 | },
247 | "nbformat": 4,
248 | "nbformat_minor": 2
249 | }
250 |
--------------------------------------------------------------------------------
/DataCheckpointGroup_035-Fa22.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# COGS 108 - Data Checkpoint"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Names\n",
15 | "\n",
16 | "- Maxwell Fang\n",
17 | "- Long Le\n",
18 | "- Huy Trinh\n",
19 | "- Hasan Shaikh\n",
20 | "- Mohammed Master"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "\n",
28 | "# Research Question"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "How does the spice level of instant ramen affect the star rating on the www.theramenrater.com in regards to preference across each country?"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "# Dataset(s)"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "\n",
50 | "- Dataset Name: Top Ramen Ratings 2022\n",
51 | "- Link to the dataset: https://www.kaggle.com/datasets/ankanhore545/top-ramen-ratings-2022\n",
52 | "- Number of observations: 4120\n",
53 | "\n",
54 | "The Top Ramen Ratings 2022 dataset is found on Kaggle. The dataset contains brand name, variety or ramen package name, style, country and stars. "
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "# Setup"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 1,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "from tqdm.notebook import tqdm\n",
71 | "import pandas as pd\n",
72 | "import numpy as np\n",
73 | "import matplotlib.pyplot as plt\n",
74 | "import seaborn as sns\n",
75 | "from collections import defaultdict\n",
76 | "from category_dict import spiciness_dict\n",
77 | "import re\n",
78 | "\n",
79 | "%matplotlib inline\n",
80 | "%config InlineBackend.figure_format = 'retina' # Higher resolution figures"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 2,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "data": {
90 | "text/html": [
91 | "
\n",
92 | "\n",
105 | "
\n",
106 | " \n",
107 | " \n",
108 | " | \n",
109 | " Review # | \n",
110 | " Brand | \n",
111 | " Variety | \n",
112 | " Style | \n",
113 | " Country | \n",
114 | " Stars | \n",
115 | " T | \n",
116 | "
\n",
117 | " \n",
118 | " \n",
119 | " \n",
120 | " | 0 | \n",
121 | " 4120 | \n",
122 | " MIT | \n",
123 | " Shallot Sauce Dry Noodle | \n",
124 | " Pack | \n",
125 | " Taiwan | \n",
126 | " 3 | \n",
127 | " NaN | \n",
128 | "
\n",
129 | " \n",
130 | " | 1 | \n",
131 | " 4119 | \n",
132 | " Sapporo Ichiban | \n",
133 | " Tonkotsu Ramen Japanese Style Noodles | \n",
134 | " Bowl | \n",
135 | " United States | \n",
136 | " 4.5 | \n",
137 | " NaN | \n",
138 | "
\n",
139 | " \n",
140 | " | 2 | \n",
141 | " 4118 | \n",
142 | " Binh Tay | \n",
143 | " Mi Kiwi | \n",
144 | " Bowl | \n",
145 | " Vietnam | \n",
146 | " 3 | \n",
147 | " NaN | \n",
148 | "
\n",
149 | " \n",
150 | " | 3 | \n",
151 | " 4117 | \n",
152 | " Charming Couple | \n",
153 | " Biang Biang Scallion Chicken Sauce | \n",
154 | " Pack | \n",
155 | " Taiwan | \n",
156 | " 4.5 | \n",
157 | " NaN | \n",
158 | "
\n",
159 | " \n",
160 | " | 4 | \n",
161 | " 4116 | \n",
162 | " immi | \n",
163 | " Tom Yum Shrimp Flavor Ramen Soup | \n",
164 | " Pack | \n",
165 | " United States | \n",
166 | " 2.75 | \n",
167 | " NaN | \n",
168 | "
\n",
169 | " \n",
170 | "
\n",
171 | "
"
172 | ],
173 | "text/plain": [
174 | " Review # Brand Variety Style \\\n",
175 | "0 4120 MIT Shallot Sauce Dry Noodle Pack \n",
176 | "1 4119 Sapporo Ichiban Tonkotsu Ramen Japanese Style Noodles Bowl \n",
177 | "2 4118 Binh Tay Mi Kiwi Bowl \n",
178 | "3 4117 Charming Couple Biang Biang Scallion Chicken Sauce Pack \n",
179 | "4 4116 immi Tom Yum Shrimp Flavor Ramen Soup Pack \n",
180 | "\n",
181 | " Country Stars T \n",
182 | "0 Taiwan 3 NaN \n",
183 | "1 United States 4.5 NaN \n",
184 | "2 Vietnam 3 NaN \n",
185 | "3 Taiwan 4.5 NaN \n",
186 | "4 United States 2.75 NaN "
187 | ]
188 | },
189 | "execution_count": 2,
190 | "metadata": {},
191 | "output_type": "execute_result"
192 | }
193 | ],
194 | "source": [
195 | "## YOUR CODE HERE\n",
196 | "ramen = pd.read_csv('ramen_rating.csv')\n",
197 | "ramen.head()"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "# Data Cleaning"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "First, we want to only keep the columns that are necessary for our analysis. Columns such as `Review #` and `T` are not needed for this analysis. Because of that reason, we are removing those columns from our analysis."
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 3,
217 | "metadata": {},
218 | "outputs": [
219 | {
220 | "data": {
221 | "text/html": [
222 | "\n",
223 | "\n",
236 | "
\n",
237 | " \n",
238 | " \n",
239 | " | \n",
240 | " Brand | \n",
241 | " Variety | \n",
242 | " Style | \n",
243 | " Country | \n",
244 | " Stars | \n",
245 | "
\n",
246 | " \n",
247 | " \n",
248 | " \n",
249 | " | 0 | \n",
250 | " MIT | \n",
251 | " Shallot Sauce Dry Noodle | \n",
252 | " Pack | \n",
253 | " Taiwan | \n",
254 | " 3 | \n",
255 | "
\n",
256 | " \n",
257 | " | 1 | \n",
258 | " Sapporo Ichiban | \n",
259 | " Tonkotsu Ramen Japanese Style Noodles | \n",
260 | " Bowl | \n",
261 | " United States | \n",
262 | " 4.5 | \n",
263 | "
\n",
264 | " \n",
265 | " | 2 | \n",
266 | " Binh Tay | \n",
267 | " Mi Kiwi | \n",
268 | " Bowl | \n",
269 | " Vietnam | \n",
270 | " 3 | \n",
271 | "
\n",
272 | " \n",
273 | " | 3 | \n",
274 | " Charming Couple | \n",
275 | " Biang Biang Scallion Chicken Sauce | \n",
276 | " Pack | \n",
277 | " Taiwan | \n",
278 | " 4.5 | \n",
279 | "
\n",
280 | " \n",
281 | " | 4 | \n",
282 | " immi | \n",
283 | " Tom Yum Shrimp Flavor Ramen Soup | \n",
284 | " Pack | \n",
285 | " United States | \n",
286 | " 2.75 | \n",
287 | "
\n",
288 | " \n",
289 | "
\n",
290 | "
"
291 | ],
292 | "text/plain": [
293 | " Brand Variety Style \\\n",
294 | "0 MIT Shallot Sauce Dry Noodle Pack \n",
295 | "1 Sapporo Ichiban Tonkotsu Ramen Japanese Style Noodles Bowl \n",
296 | "2 Binh Tay Mi Kiwi Bowl \n",
297 | "3 Charming Couple Biang Biang Scallion Chicken Sauce Pack \n",
298 | "4 immi Tom Yum Shrimp Flavor Ramen Soup Pack \n",
299 | "\n",
300 | " Country Stars \n",
301 | "0 Taiwan 3 \n",
302 | "1 United States 4.5 \n",
303 | "2 Vietnam 3 \n",
304 | "3 Taiwan 4.5 \n",
305 | "4 United States 2.75 "
306 | ]
307 | },
308 | "execution_count": 3,
309 | "metadata": {},
310 | "output_type": "execute_result"
311 | }
312 | ],
313 | "source": [
314 | "## YOUR CODE HERE\n",
315 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION\n",
316 | "df = ramen.drop(columns=['T','Review #'])\n",
317 | "df.head()"
318 | ]
319 | },
320 | {
321 | "cell_type": "markdown",
322 | "metadata": {},
323 | "source": [
324 | "## Feature Engineering\n",
325 | "We want to add the spiciness column from extracting the text from `Variety` column. First, we want to see if `mild`, `medium`, `spicy`, `hot`, or `fiery` is in the text itself. If any of those words is in the text, we would use the according spiciness scale to categorize the group."
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": 4,
331 | "metadata": {},
332 | "outputs": [
333 | {
334 | "data": {
335 | "text/html": [
336 | "\n",
337 | "\n",
350 | "
\n",
351 | " \n",
352 | " \n",
353 | " | \n",
354 | " Brand | \n",
355 | " Variety | \n",
356 | " Style | \n",
357 | " Country | \n",
358 | " Stars | \n",
359 | " spiciness | \n",
360 | "
\n",
361 | " \n",
362 | " \n",
363 | " \n",
364 | " | 0 | \n",
365 | " MIT | \n",
366 | " Shallot Sauce Dry Noodle | \n",
367 | " Pack | \n",
368 | " Taiwan | \n",
369 | " 3 | \n",
370 | " pepper free | \n",
371 | "
\n",
372 | " \n",
373 | " | 1 | \n",
374 | " Sapporo Ichiban | \n",
375 | " Tonkotsu Ramen Japanese Style Noodles | \n",
376 | " Bowl | \n",
377 | " United States | \n",
378 | " 4.5 | \n",
379 | " pepper free | \n",
380 | "
\n",
381 | " \n",
382 | " | 2 | \n",
383 | " Binh Tay | \n",
384 | " Mi Kiwi | \n",
385 | " Bowl | \n",
386 | " Vietnam | \n",
387 | " 3 | \n",
388 | " pepper free | \n",
389 | "
\n",
390 | " \n",
391 | " | 3 | \n",
392 | " Charming Couple | \n",
393 | " Biang Biang Scallion Chicken Sauce | \n",
394 | " Pack | \n",
395 | " Taiwan | \n",
396 | " 4.5 | \n",
397 | " pepper free | \n",
398 | "
\n",
399 | " \n",
400 | " | 4 | \n",
401 | " immi | \n",
402 | " Tom Yum Shrimp Flavor Ramen Soup | \n",
403 | " Pack | \n",
404 | " United States | \n",
405 | " 2.75 | \n",
406 | " pepper free | \n",
407 | "
\n",
408 | " \n",
409 | "
\n",
410 | "
"
411 | ],
412 | "text/plain": [
413 | " Brand Variety Style \\\n",
414 | "0 MIT Shallot Sauce Dry Noodle Pack \n",
415 | "1 Sapporo Ichiban Tonkotsu Ramen Japanese Style Noodles Bowl \n",
416 | "2 Binh Tay Mi Kiwi Bowl \n",
417 | "3 Charming Couple Biang Biang Scallion Chicken Sauce Pack \n",
418 | "4 immi Tom Yum Shrimp Flavor Ramen Soup Pack \n",
419 | "\n",
420 | " Country Stars spiciness \n",
421 | "0 Taiwan 3 pepper free \n",
422 | "1 United States 4.5 pepper free \n",
423 | "2 Vietnam 3 pepper free \n",
424 | "3 Taiwan 4.5 pepper free \n",
425 | "4 United States 2.75 pepper free "
426 | ]
427 | },
428 | "execution_count": 4,
429 | "metadata": {},
430 | "output_type": "execute_result"
431 | }
432 | ],
433 | "source": [
434 | "spicy_levels = ['mild','medium','spicy','hot','fiery']\n",
435 | "def spiciness(text):\n",
436 | " text= text.lower()\n",
437 | " for spice in spicy_levels:\n",
438 | " if spice in text:\n",
439 | " return spice\n",
440 | " return 'pepper free'\n",
441 | "\n",
442 | "df['spiciness'] = df['Variety'].apply(spiciness)\n",
443 | "df.head()"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": 5,
449 | "metadata": {},
450 | "outputs": [
451 | {
452 | "data": {
453 | "text/plain": [
454 | "pepper free 3523\n",
455 | "spicy 446\n",
456 | "hot 128\n",
457 | "mild 21\n",
458 | "fiery 2\n",
459 | "Name: spiciness, dtype: int64"
460 | ]
461 | },
462 | "execution_count": 5,
463 | "metadata": {},
464 | "output_type": "execute_result"
465 | }
466 | ],
467 | "source": [
468 | "df.spiciness.value_counts()"
469 | ]
470 | },
471 | {
472 | "cell_type": "markdown",
473 | "metadata": {},
474 | "source": [
475 | "With this simple text extraction, we are able to correctly categorize almost 600 of these instant ramens. Next, we want to use the popular phrases in the variety text to manually categorize those phrases. Every phrase is made up from splitting up the string in the column `Variety` to pairs of words. Then, we loop through instant noodle name to count the occurence of these phrases."
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": 6,
481 | "metadata": {},
482 | "outputs": [
483 | {
484 | "data": {
485 | "text/plain": [
486 | "[('instant noodles', 298),\n",
487 | " ('noodle soup', 178),\n",
488 | " ('cup noodles', 161),\n",
489 | " ('instant noodle', 134),\n",
490 | " ('tom yum', 117),\n",
491 | " ('chicken flavor', 104)]"
492 | ]
493 | },
494 | "execution_count": 6,
495 | "metadata": {},
496 | "output_type": "execute_result"
497 | }
498 | ],
499 | "source": [
500 | "# extracting spiciness from text\n",
501 | "# cleaning the string from the Variety column\n",
502 | "variety_list = (df['Variety'].str.lower()\n",
503 | " .str.replace('[^a-zA-Z]', ' ', regex=True)\n",
504 | " .str.replace(' +', ' ', regex=True).values)\n",
505 | "\n",
506 | "# finding the most popular phrases to catagorize\n",
507 | "pairs_dict = {}\n",
508 | "for i in variety_list:\n",
509 | " words = i.split()\n",
510 | " for j in range(len(words) - 1):\n",
511 | " pair = words[j] + ' ' + words[j+1]\n",
512 | " if pair in pairs_dict:\n",
513 | " pairs_dict[pair] += 1\n",
514 | " else:\n",
515 | " pairs_dict[pair] = 1\n",
516 | " \n",
517 | "# only take the popular phrase if it appears more than 10 times\n",
518 | "popular_phrases = [(i, pairs_dict[i]) for i in pairs_dict if pairs_dict[i] > 10]\n",
519 | "popular_phrases.sort(key=lambda x: x[1],reverse = True)\n",
520 | "popular_phrases[:6] "
521 | ]
522 | },
523 | {
524 | "cell_type": "markdown",
525 | "metadata": {},
526 | "source": [
527 | "From these most used phrases, phrases such as `'instant noodles'` or `'noodle soup'` or `'cup noodles'` are not informative. Because of that reason, we cannot use these uninformative phrases to categorize the spiciness. On the other hand, we can use phrases like `'tom yum'` and `'chicken flavor'` to categorize their spiciness level. We include our categorization in the `category_dict.py` file in the same directory as this Jupyter Notebook.\n",
528 | "\n",
529 | "Then, we loop through the every row to find if the `Variety` column contains any of the pairs that we previously categorized. If the phrase exists in its packaging name, we categorize it with its appropriate spiciness. \n",
530 | "\n",
531 | "In the end, we decided to merge `mild` and `medium` together since the Scoville units were relatively close to each other. Similarly, we decided to merge `hot` and `fiery` since the value counts for `fiery` were extremely low. "
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": 7,
537 | "metadata": {
538 | "scrolled": true
539 | },
540 | "outputs": [
541 | {
542 | "data": {
543 | "text/plain": [
544 | "pepper free 2834\n",
545 | "mild 526\n",
546 | "spicy 446\n",
547 | "hot 314\n",
548 | "Name: spiciness_2, dtype: int64"
549 | ]
550 | },
551 | "execution_count": 7,
552 | "metadata": {},
553 | "output_type": "execute_result"
554 | }
555 | ],
556 | "source": [
557 | "spicy_levels = ['mild','spicy','hot']\n",
558 | "def spiciness(text):\n",
559 | " # catch if the spciniess is directly in the text\n",
560 | " text = re.sub('r[^a-zA-Z]', ' ', text.lower())\n",
561 | " text = re.sub(' +', ' ', text)\n",
562 | " for spice in spicy_levels:\n",
563 | " if spice in text:\n",
564 | " return spice\n",
565 | " \n",
566 | " # catch if the spiciness is implied in the text \n",
567 | " words = text.split()\n",
568 | " for j in range(len(words) - 1):\n",
569 | " pair = words[j] + ' ' + words[j+1]\n",
570 | " if pair in spiciness_dict:\n",
571 | " return spiciness_dict[pair]\n",
572 | " return 'pepper free'\n",
573 | "\n",
574 | "\n",
575 | "df['spiciness_2'] = df['Variety'].apply(spiciness)\n",
576 | "df['spiciness_2'].value_counts()"
577 | ]
578 | },
579 | {
580 | "cell_type": "markdown",
581 | "metadata": {},
582 | "source": [
583 | "After using engineering the feature using this process, we are able to categorize up to almost 1300 noodles. In this case we followed the levels from the Scoville unit which is a unit of pungency. "
584 | ]
585 | },
586 | {
587 | "cell_type": "code",
588 | "execution_count": 8,
589 | "metadata": {},
590 | "outputs": [
591 | {
592 | "data": {
593 | "text/plain": [
594 | "0 2834\n",
595 | "(100, 2500) 526\n",
596 | "(2500, 10000) 446\n",
597 | "(10000, 50000) 314\n",
598 | "Name: Scoville, dtype: int64"
599 | ]
600 | },
601 | "execution_count": 8,
602 | "metadata": {},
603 | "output_type": "execute_result"
604 | }
605 | ],
606 | "source": [
607 | "#0 is non-pepper, \n",
608 | "#(100, 2500) is mild\n",
609 | "#(2500, 10000) is spicy\n",
610 | "#(10000, 50000) is hot\n",
611 | "#units: Scoville heat units\n",
612 | "scoville_levels = [0, (100, 2500), (2500, 10000), (10000, 50000)]\n",
613 | "\n",
614 | "def calc_scoville_level(level):\n",
615 | " if level == 'pepper free':\n",
616 | " return scoville_levels[0]\n",
617 | " elif level == 'mild':\n",
618 | " return scoville_levels[1]\n",
619 | " elif level == 'spicy':\n",
620 | " return scoville_levels[2]\n",
621 | " else:\n",
622 | " return scoville_levels[3]\n",
623 | " \n",
624 | "df['Scoville'] = df['spiciness_2'].apply(calc_scoville_level)\n",
625 | "df['Scoville'].value_counts()"
626 | ]
627 | },
628 | {
629 | "cell_type": "markdown",
630 | "metadata": {},
631 | "source": [
632 | "# Separate by Region"
633 | ]
634 | },
635 | {
636 | "cell_type": "markdown",
637 | "metadata": {},
638 | "source": [
639 | "'https://worldpopulationreview.com/country-rankings/list-of-countries-by-continent'"
640 | ]
641 | },
642 | {
643 | "cell_type": "markdown",
644 | "metadata": {},
645 | "source": [
646 | "We brought in a new dataset from the url above to respectfully separate each country with its continents. We did this so we can create a new column in the dataframe. Because there is already a country column, we are able to group the countries in a larger region known as \"continents\"."
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": 9,
652 | "metadata": {
653 | "scrolled": true
654 | },
655 | "outputs": [
656 | {
657 | "data": {
658 | "text/html": [
659 | "\n",
660 | "\n",
673 | "
\n",
674 | " \n",
675 | " \n",
676 | " | \n",
677 | " country | \n",
678 | " continent | \n",
679 | "
\n",
680 | " \n",
681 | " \n",
682 | " \n",
683 | " | 0 | \n",
684 | " algeria | \n",
685 | " Africa | \n",
686 | "
\n",
687 | " \n",
688 | " | 1 | \n",
689 | " angola | \n",
690 | " Africa | \n",
691 | "
\n",
692 | " \n",
693 | " | 2 | \n",
694 | " benin | \n",
695 | " Africa | \n",
696 | "
\n",
697 | " \n",
698 | " | 3 | \n",
699 | " botswana | \n",
700 | " Africa | \n",
701 | "
\n",
702 | " \n",
703 | " | 4 | \n",
704 | " burkinafaso | \n",
705 | " Africa | \n",
706 | "
\n",
707 | " \n",
708 | " | ... | \n",
709 | " ... | \n",
710 | " ... | \n",
711 | "
\n",
712 | " \n",
713 | " | 229 | \n",
714 | " paraguay | \n",
715 | " South America | \n",
716 | "
\n",
717 | " \n",
718 | " | 230 | \n",
719 | " peru | \n",
720 | " South America | \n",
721 | "
\n",
722 | " \n",
723 | " | 231 | \n",
724 | " suriname | \n",
725 | " South America | \n",
726 | "
\n",
727 | " \n",
728 | " | 232 | \n",
729 | " uruguay | \n",
730 | " South America | \n",
731 | "
\n",
732 | " \n",
733 | " | 233 | \n",
734 | " venezuela | \n",
735 | " South America | \n",
736 | "
\n",
737 | " \n",
738 | "
\n",
739 | "
234 rows × 2 columns
\n",
740 | "
"
741 | ],
742 | "text/plain": [
743 | " country continent\n",
744 | "0 algeria Africa\n",
745 | "1 angola Africa\n",
746 | "2 benin Africa\n",
747 | "3 botswana Africa\n",
748 | "4 burkinafaso Africa\n",
749 | ".. ... ...\n",
750 | "229 paraguay South America\n",
751 | "230 peru South America\n",
752 | "231 suriname South America\n",
753 | "232 uruguay South America\n",
754 | "233 venezuela South America\n",
755 | "\n",
756 | "[234 rows x 2 columns]"
757 | ]
758 | },
759 | "execution_count": 9,
760 | "metadata": {},
761 | "output_type": "execute_result"
762 | }
763 | ],
764 | "source": [
765 | "continent = pd.read_csv('country_by_continent.csv')\n",
766 | "continent['country'] = continent['country'].str.lower().str.replace(' ','')\n",
767 | "df['country'] = df['Country'].str.lower().str.replace(' ','')\n",
768 | "continent"
769 | ]
770 | },
771 | {
772 | "cell_type": "code",
773 | "execution_count": 11,
774 | "metadata": {},
775 | "outputs": [
776 | {
777 | "data": {
778 | "text/plain": [
779 | "Asia 3192\n",
780 | "North America 658\n",
781 | "Europe 109\n",
782 | "South America 34\n",
783 | "Oceania 30\n",
784 | "Africa 4\n",
785 | "Name: continent, dtype: int64"
786 | ]
787 | },
788 | "execution_count": 11,
789 | "metadata": {},
790 | "output_type": "execute_result"
791 | }
792 | ],
793 | "source": [
794 | "new_df = pd.merge(df, continent, on=\"country\")\n",
795 | "new_df = new_df.drop(columns=['country'])\n",
796 | "new_df.continent.value_counts()"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": 12,
802 | "metadata": {},
803 | "outputs": [
804 | {
805 | "data": {
806 | "text/html": [
807 | "\n",
808 | "\n",
821 | "
\n",
822 | " \n",
823 | " \n",
824 | " | \n",
825 | " Brand | \n",
826 | " Variety | \n",
827 | " Style | \n",
828 | " Country | \n",
829 | " Stars | \n",
830 | " spiciness | \n",
831 | " spiciness_2 | \n",
832 | " Scoville | \n",
833 | " continent | \n",
834 | "
\n",
835 | " \n",
836 | " \n",
837 | " \n",
838 | " | 0 | \n",
839 | " MIT | \n",
840 | " Shallot Sauce Dry Noodle | \n",
841 | " Pack | \n",
842 | " Taiwan | \n",
843 | " 3 | \n",
844 | " pepper free | \n",
845 | " pepper free | \n",
846 | " 0 | \n",
847 | " Asia | \n",
848 | "
\n",
849 | " \n",
850 | " | 1 | \n",
851 | " Charming Couple | \n",
852 | " Biang Biang Scallion Chicken Sauce | \n",
853 | " Pack | \n",
854 | " Taiwan | \n",
855 | " 4.5 | \n",
856 | " pepper free | \n",
857 | " pepper free | \n",
858 | " 0 | \n",
859 | " Asia | \n",
860 | "
\n",
861 | " \n",
862 | " | 2 | \n",
863 | " Hi Lai Foods | \n",
864 | " Lai Noodle Vegan Sesame Paste Flavor | \n",
865 | " Pack | \n",
866 | " Taiwan | \n",
867 | " 5 | \n",
868 | " pepper free | \n",
869 | " pepper free | \n",
870 | " 0 | \n",
871 | " Asia | \n",
872 | "
\n",
873 | " \n",
874 | " | 3 | \n",
875 | " Ve Wong | \n",
876 | " Artificial Peppered Beef Flavor | \n",
877 | " Pack | \n",
878 | " Taiwan | \n",
879 | " 3.5 | \n",
880 | " pepper free | \n",
881 | " mild | \n",
882 | " (100, 2500) | \n",
883 | " Asia | \n",
884 | "
\n",
885 | " \n",
886 | " | 4 | \n",
887 | " iNoodle | \n",
888 | " Flat Noodle Soy Sauce Flavor | \n",
889 | " Pack | \n",
890 | " Taiwan | \n",
891 | " 3.25 | \n",
892 | " pepper free | \n",
893 | " mild | \n",
894 | " (100, 2500) | \n",
895 | " Asia | \n",
896 | "
\n",
897 | " \n",
898 | " | ... | \n",
899 | " ... | \n",
900 | " ... | \n",
901 | " ... | \n",
902 | " ... | \n",
903 | " ... | \n",
904 | " ... | \n",
905 | " ... | \n",
906 | " ... | \n",
907 | " ... | \n",
908 | "
\n",
909 | " \n",
910 | " | 4022 | \n",
911 | " Nissin | \n",
912 | " Sabor A Pollo Sopa Instantánea Con Fideos | \n",
913 | " Pack | \n",
914 | " Colombia | \n",
915 | " 3.25 | \n",
916 | " pepper free | \n",
917 | " pepper free | \n",
918 | " 0 | \n",
919 | " South America | \n",
920 | "
\n",
921 | " \n",
922 | " | 4023 | \n",
923 | " Nissin | \n",
924 | " Cup Noodles Sabor A Gallina | \n",
925 | " Cup | \n",
926 | " Colombia | \n",
927 | " 3.5 | \n",
928 | " pepper free | \n",
929 | " pepper free | \n",
930 | " 0 | \n",
931 | " South America | \n",
932 | "
\n",
933 | " \n",
934 | " | 4024 | \n",
935 | " Nissin | \n",
936 | " Sabor A Carne Sopa Instantánea Con Fideos | \n",
937 | " Pack | \n",
938 | " Colombia | \n",
939 | " 3.75 | \n",
940 | " pepper free | \n",
941 | " pepper free | \n",
942 | " 0 | \n",
943 | " South America | \n",
944 | "
\n",
945 | " \n",
946 | " | 4025 | \n",
947 | " Baltix | \n",
948 | " Instant Noodles With Chicken Flavour Broth | \n",
949 | " Pack | \n",
950 | " Estonia | \n",
951 | " 3.75 | \n",
952 | " pepper free | \n",
953 | " pepper free | \n",
954 | " 0 | \n",
955 | " Europe | \n",
956 | "
\n",
957 | " \n",
958 | " | 4026 | \n",
959 | " Baltix | \n",
960 | " Instant Noodles With Beef Flavour Broth | \n",
961 | " Pack | \n",
962 | " Estonia | \n",
963 | " 3.25 | \n",
964 | " pepper free | \n",
965 | " pepper free | \n",
966 | " 0 | \n",
967 | " Europe | \n",
968 | "
\n",
969 | " \n",
970 | "
\n",
971 | "
4027 rows × 9 columns
\n",
972 | "
"
973 | ],
974 | "text/plain": [
975 | " Brand Variety Style \\\n",
976 | "0 MIT Shallot Sauce Dry Noodle Pack \n",
977 | "1 Charming Couple Biang Biang Scallion Chicken Sauce Pack \n",
978 | "2 Hi Lai Foods Lai Noodle Vegan Sesame Paste Flavor Pack \n",
979 | "3 Ve Wong Artificial Peppered Beef Flavor Pack \n",
980 | "4 iNoodle Flat Noodle Soy Sauce Flavor Pack \n",
981 | "... ... ... ... \n",
982 | "4022 Nissin Sabor A Pollo Sopa Instantánea Con Fideos Pack \n",
983 | "4023 Nissin Cup Noodles Sabor A Gallina Cup \n",
984 | "4024 Nissin Sabor A Carne Sopa Instantánea Con Fideos Pack \n",
985 | "4025 Baltix Instant Noodles With Chicken Flavour Broth Pack \n",
986 | "4026 Baltix Instant Noodles With Beef Flavour Broth Pack \n",
987 | "\n",
988 | " Country Stars spiciness spiciness_2 Scoville continent \n",
989 | "0 Taiwan 3 pepper free pepper free 0 Asia \n",
990 | "1 Taiwan 4.5 pepper free pepper free 0 Asia \n",
991 | "2 Taiwan 5 pepper free pepper free 0 Asia \n",
992 | "3 Taiwan 3.5 pepper free mild (100, 2500) Asia \n",
993 | "4 Taiwan 3.25 pepper free mild (100, 2500) Asia \n",
994 | "... ... ... ... ... ... ... \n",
995 | "4022 Colombia 3.25 pepper free pepper free 0 South America \n",
996 | "4023 Colombia 3.5 pepper free pepper free 0 South America \n",
997 | "4024 Colombia 3.75 pepper free pepper free 0 South America \n",
998 | "4025 Estonia 3.75 pepper free pepper free 0 Europe \n",
999 | "4026 Estonia 3.25 pepper free pepper free 0 Europe \n",
1000 | "\n",
1001 | "[4027 rows x 9 columns]"
1002 | ]
1003 | },
1004 | "execution_count": 12,
1005 | "metadata": {},
1006 | "output_type": "execute_result"
1007 | }
1008 | ],
1009 | "source": [
1010 | "new_df"
1011 | ]
1012 | },
1013 | {
1014 | "cell_type": "code",
1015 | "execution_count": null,
1016 | "metadata": {},
1017 | "outputs": [],
1018 | "source": []
1019 | }
1020 | ],
1021 | "metadata": {
1022 | "kernelspec": {
1023 | "display_name": "Python 3 (ipykernel)",
1024 | "language": "python",
1025 | "name": "python3"
1026 | },
1027 | "language_info": {
1028 | "codemirror_mode": {
1029 | "name": "ipython",
1030 | "version": 3
1031 | },
1032 | "file_extension": ".py",
1033 | "mimetype": "text/x-python",
1034 | "name": "python",
1035 | "nbconvert_exporter": "python",
1036 | "pygments_lexer": "ipython3",
1037 | "version": "3.9.5"
1038 | }
1039 | },
1040 | "nbformat": 4,
1041 | "nbformat_minor": 2
1042 | }
1043 |
--------------------------------------------------------------------------------
/Previous.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# COGS 108 - Data Checkpoint"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Names\n",
15 | "\n",
16 | "- Maxwell Fang\n",
17 | "- Long Le\n",
18 | "- Huy Trinh\n",
19 | "- Hasan Shaikh\n",
20 | "- Mohammed Master"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "\n",
28 | "# Research Question"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "How does the spice level of instant ramen affect the star rating on the www.theramenrater.com in regards to preference across each country?"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "# Dataset(s)"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "*Fill in your dataset information here*\n",
50 | "\n",
51 | "(Copy this information for each dataset)\n",
52 | "- Dataset Name: Top Ramen Ratings 2022\n",
53 | "- Link to the dataset: https://www.kaggle.com/datasets/ankanhore545/top-ramen-ratings-2022\n",
54 | "- Number of observations: 4120\n",
55 | "\n",
56 | "1-2 sentences describing each dataset. \n",
57 | "\n",
58 | "If you plan to use multiple datasets, add 1-2 sentences about how you plan to combine these datasets."
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "# Setup"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 1,
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "from tqdm.notebook import tqdm\n",
75 | "import pandas as pd\n",
76 | "import numpy as np\n",
77 | "import matplotlib.pyplot as plt\n",
78 | "import seaborn as sns\n",
79 | "from collections import defaultdict\n",
80 | "import re\n",
81 | "\n",
82 | "%matplotlib inline\n",
83 | "%config InlineBackend.figure_format = 'retina' # Higher resolution figures"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 2,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "data": {
93 | "text/html": [
94 | "\n",
95 | "\n",
108 | "
\n",
109 | " \n",
110 | " \n",
111 | " | \n",
112 | " Review # | \n",
113 | " Brand | \n",
114 | " Variety | \n",
115 | " Style | \n",
116 | " Country | \n",
117 | " Stars | \n",
118 | " T | \n",
119 | "
\n",
120 | " \n",
121 | " \n",
122 | " \n",
123 | " | 0 | \n",
124 | " 4120 | \n",
125 | " MIT | \n",
126 | " Shallot Sauce Dry Noodle | \n",
127 | " Pack | \n",
128 | " Taiwan | \n",
129 | " 3 | \n",
130 | " NaN | \n",
131 | "
\n",
132 | " \n",
133 | " | 1 | \n",
134 | " 4119 | \n",
135 | " Sapporo Ichiban | \n",
136 | " Tonkotsu Ramen Japanese Style Noodles | \n",
137 | " Bowl | \n",
138 | " United States | \n",
139 | " 4.5 | \n",
140 | " NaN | \n",
141 | "
\n",
142 | " \n",
143 | " | 2 | \n",
144 | " 4118 | \n",
145 | " Binh Tay | \n",
146 | " Mi Kiwi | \n",
147 | " Bowl | \n",
148 | " Vietnam | \n",
149 | " 3 | \n",
150 | " NaN | \n",
151 | "
\n",
152 | " \n",
153 | " | 3 | \n",
154 | " 4117 | \n",
155 | " Charming Couple | \n",
156 | " Biang Biang Scallion Chicken Sauce | \n",
157 | " Pack | \n",
158 | " Taiwan | \n",
159 | " 4.5 | \n",
160 | " NaN | \n",
161 | "
\n",
162 | " \n",
163 | " | 4 | \n",
164 | " 4116 | \n",
165 | " immi | \n",
166 | " Tom Yum Shrimp Flavor Ramen Soup | \n",
167 | " Pack | \n",
168 | " United States | \n",
169 | " 2.75 | \n",
170 | " NaN | \n",
171 | "
\n",
172 | " \n",
173 | " | ... | \n",
174 | " ... | \n",
175 | " ... | \n",
176 | " ... | \n",
177 | " ... | \n",
178 | " ... | \n",
179 | " ... | \n",
180 | " ... | \n",
181 | "
\n",
182 | " \n",
183 | " | 4115 | \n",
184 | " 5 | \n",
185 | " Vifon | \n",
186 | " Hu Tiu Nam Vang [\"Phnom Penh\" style] Asian Sty... | \n",
187 | " Bowl | \n",
188 | " Vietnam | \n",
189 | " 3.5 | \n",
190 | " NaN | \n",
191 | "
\n",
192 | " \n",
193 | " | 4116 | \n",
194 | " 4 | \n",
195 | " Wai Wai | \n",
196 | " Oriental Style Instant Noodles | \n",
197 | " Pack | \n",
198 | " Thailand | \n",
199 | " 1 | \n",
200 | " NaN | \n",
201 | "
\n",
202 | " \n",
203 | " | 4117 | \n",
204 | " 3 | \n",
205 | " Wai Wai | \n",
206 | " Tom Yum Shrimp | \n",
207 | " Pack | \n",
208 | " Thailand | \n",
209 | " 2 | \n",
210 | " NaN | \n",
211 | "
\n",
212 | " \n",
213 | " | 4118 | \n",
214 | " 2 | \n",
215 | " Wai Wai | \n",
216 | " Tom Yum Chili Flavor | \n",
217 | " Pack | \n",
218 | " Thailand | \n",
219 | " 2 | \n",
220 | " NaN | \n",
221 | "
\n",
222 | " \n",
223 | " | 4119 | \n",
224 | " 1 | \n",
225 | " Westbrae | \n",
226 | " Miso Ramen | \n",
227 | " Pack | \n",
228 | " United States | \n",
229 | " 0.5 | \n",
230 | " NaN | \n",
231 | "
\n",
232 | " \n",
233 | "
\n",
234 | "
4120 rows × 7 columns
\n",
235 | "
"
236 | ],
237 | "text/plain": [
238 | " Review # Brand \\\n",
239 | "0 4120 MIT \n",
240 | "1 4119 Sapporo Ichiban \n",
241 | "2 4118 Binh Tay \n",
242 | "3 4117 Charming Couple \n",
243 | "4 4116 immi \n",
244 | "... ... ... \n",
245 | "4115 5 Vifon \n",
246 | "4116 4 Wai Wai \n",
247 | "4117 3 Wai Wai \n",
248 | "4118 2 Wai Wai \n",
249 | "4119 1 Westbrae \n",
250 | "\n",
251 | " Variety Style Country \\\n",
252 | "0 Shallot Sauce Dry Noodle Pack Taiwan \n",
253 | "1 Tonkotsu Ramen Japanese Style Noodles Bowl United States \n",
254 | "2 Mi Kiwi Bowl Vietnam \n",
255 | "3 Biang Biang Scallion Chicken Sauce Pack Taiwan \n",
256 | "4 Tom Yum Shrimp Flavor Ramen Soup Pack United States \n",
257 | "... ... ... ... \n",
258 | "4115 Hu Tiu Nam Vang [\"Phnom Penh\" style] Asian Sty... Bowl Vietnam \n",
259 | "4116 Oriental Style Instant Noodles Pack Thailand \n",
260 | "4117 Tom Yum Shrimp Pack Thailand \n",
261 | "4118 Tom Yum Chili Flavor Pack Thailand \n",
262 | "4119 Miso Ramen Pack United States \n",
263 | "\n",
264 | " Stars T \n",
265 | "0 3 NaN \n",
266 | "1 4.5 NaN \n",
267 | "2 3 NaN \n",
268 | "3 4.5 NaN \n",
269 | "4 2.75 NaN \n",
270 | "... ... .. \n",
271 | "4115 3.5 NaN \n",
272 | "4116 1 NaN \n",
273 | "4117 2 NaN \n",
274 | "4118 2 NaN \n",
275 | "4119 0.5 NaN \n",
276 | "\n",
277 | "[4120 rows x 7 columns]"
278 | ]
279 | },
280 | "execution_count": 2,
281 | "metadata": {},
282 | "output_type": "execute_result"
283 | }
284 | ],
285 | "source": [
286 | "## YOUR CODE HERE\n",
287 | "ramen = pd.read_csv('ramen_rating.csv')\n",
288 | "ramen"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "metadata": {},
294 | "source": [
295 | "# Data Cleaning"
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "metadata": {},
301 | "source": [
302 | "Describe your data cleaning steps here."
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 3,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/html": [
313 | "\n",
314 | "\n",
327 | "
\n",
328 | " \n",
329 | " \n",
330 | " | \n",
331 | " Brand | \n",
332 | " Variety | \n",
333 | " Style | \n",
334 | " Country | \n",
335 | " Stars | \n",
336 | "
\n",
337 | " \n",
338 | " \n",
339 | " \n",
340 | " | 0 | \n",
341 | " MIT | \n",
342 | " Shallot Sauce Dry Noodle | \n",
343 | " Pack | \n",
344 | " Taiwan | \n",
345 | " 3 | \n",
346 | "
\n",
347 | " \n",
348 | " | 1 | \n",
349 | " Sapporo Ichiban | \n",
350 | " Tonkotsu Ramen Japanese Style Noodles | \n",
351 | " Bowl | \n",
352 | " United States | \n",
353 | " 4.5 | \n",
354 | "
\n",
355 | " \n",
356 | " | 2 | \n",
357 | " Binh Tay | \n",
358 | " Mi Kiwi | \n",
359 | " Bowl | \n",
360 | " Vietnam | \n",
361 | " 3 | \n",
362 | "
\n",
363 | " \n",
364 | " | 3 | \n",
365 | " Charming Couple | \n",
366 | " Biang Biang Scallion Chicken Sauce | \n",
367 | " Pack | \n",
368 | " Taiwan | \n",
369 | " 4.5 | \n",
370 | "
\n",
371 | " \n",
372 | " | 4 | \n",
373 | " immi | \n",
374 | " Tom Yum Shrimp Flavor Ramen Soup | \n",
375 | " Pack | \n",
376 | " United States | \n",
377 | " 2.75 | \n",
378 | "
\n",
379 | " \n",
380 | "
\n",
381 | "
"
382 | ],
383 | "text/plain": [
384 | " Brand Variety Style \\\n",
385 | "0 MIT Shallot Sauce Dry Noodle Pack \n",
386 | "1 Sapporo Ichiban Tonkotsu Ramen Japanese Style Noodles Bowl \n",
387 | "2 Binh Tay Mi Kiwi Bowl \n",
388 | "3 Charming Couple Biang Biang Scallion Chicken Sauce Pack \n",
389 | "4 immi Tom Yum Shrimp Flavor Ramen Soup Pack \n",
390 | "\n",
391 | " Country Stars \n",
392 | "0 Taiwan 3 \n",
393 | "1 United States 4.5 \n",
394 | "2 Vietnam 3 \n",
395 | "3 Taiwan 4.5 \n",
396 | "4 United States 2.75 "
397 | ]
398 | },
399 | "execution_count": 3,
400 | "metadata": {},
401 | "output_type": "execute_result"
402 | }
403 | ],
404 | "source": [
405 | "## YOUR CODE HERE\n",
406 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION\n",
407 | "df = ramen.drop(columns=['T','Review #'])\n",
408 | "df.head()"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 4,
414 | "metadata": {},
415 | "outputs": [
416 | {
417 | "data": {
418 | "text/html": [
419 | "\n",
420 | "\n",
433 | "
\n",
434 | " \n",
435 | " \n",
436 | " | \n",
437 | " Brand | \n",
438 | " Variety | \n",
439 | " Style | \n",
440 | " Country | \n",
441 | " Stars | \n",
442 | " spiciness | \n",
443 | "
\n",
444 | " \n",
445 | " \n",
446 | " \n",
447 | " | 0 | \n",
448 | " MIT | \n",
449 | " Shallot Sauce Dry Noodle | \n",
450 | " Pack | \n",
451 | " Taiwan | \n",
452 | " 3 | \n",
453 | " pepper free | \n",
454 | "
\n",
455 | " \n",
456 | " | 1 | \n",
457 | " Sapporo Ichiban | \n",
458 | " Tonkotsu Ramen Japanese Style Noodles | \n",
459 | " Bowl | \n",
460 | " United States | \n",
461 | " 4.5 | \n",
462 | " pepper free | \n",
463 | "
\n",
464 | " \n",
465 | " | 2 | \n",
466 | " Binh Tay | \n",
467 | " Mi Kiwi | \n",
468 | " Bowl | \n",
469 | " Vietnam | \n",
470 | " 3 | \n",
471 | " pepper free | \n",
472 | "
\n",
473 | " \n",
474 | " | 3 | \n",
475 | " Charming Couple | \n",
476 | " Biang Biang Scallion Chicken Sauce | \n",
477 | " Pack | \n",
478 | " Taiwan | \n",
479 | " 4.5 | \n",
480 | " pepper free | \n",
481 | "
\n",
482 | " \n",
483 | " | 4 | \n",
484 | " immi | \n",
485 | " Tom Yum Shrimp Flavor Ramen Soup | \n",
486 | " Pack | \n",
487 | " United States | \n",
488 | " 2.75 | \n",
489 | " pepper free | \n",
490 | "
\n",
491 | " \n",
492 | " | ... | \n",
493 | " ... | \n",
494 | " ... | \n",
495 | " ... | \n",
496 | " ... | \n",
497 | " ... | \n",
498 | " ... | \n",
499 | "
\n",
500 | " \n",
501 | " | 4115 | \n",
502 | " Vifon | \n",
503 | " Hu Tiu Nam Vang [\"Phnom Penh\" style] Asian Sty... | \n",
504 | " Bowl | \n",
505 | " Vietnam | \n",
506 | " 3.5 | \n",
507 | " pepper free | \n",
508 | "
\n",
509 | " \n",
510 | " | 4116 | \n",
511 | " Wai Wai | \n",
512 | " Oriental Style Instant Noodles | \n",
513 | " Pack | \n",
514 | " Thailand | \n",
515 | " 1 | \n",
516 | " pepper free | \n",
517 | "
\n",
518 | " \n",
519 | " | 4117 | \n",
520 | " Wai Wai | \n",
521 | " Tom Yum Shrimp | \n",
522 | " Pack | \n",
523 | " Thailand | \n",
524 | " 2 | \n",
525 | " pepper free | \n",
526 | "
\n",
527 | " \n",
528 | " | 4118 | \n",
529 | " Wai Wai | \n",
530 | " Tom Yum Chili Flavor | \n",
531 | " Pack | \n",
532 | " Thailand | \n",
533 | " 2 | \n",
534 | " pepper free | \n",
535 | "
\n",
536 | " \n",
537 | " | 4119 | \n",
538 | " Westbrae | \n",
539 | " Miso Ramen | \n",
540 | " Pack | \n",
541 | " United States | \n",
542 | " 0.5 | \n",
543 | " pepper free | \n",
544 | "
\n",
545 | " \n",
546 | "
\n",
547 | "
4120 rows × 6 columns
\n",
548 | "
"
549 | ],
550 | "text/plain": [
551 | " Brand Variety \\\n",
552 | "0 MIT Shallot Sauce Dry Noodle \n",
553 | "1 Sapporo Ichiban Tonkotsu Ramen Japanese Style Noodles \n",
554 | "2 Binh Tay Mi Kiwi \n",
555 | "3 Charming Couple Biang Biang Scallion Chicken Sauce \n",
556 | "4 immi Tom Yum Shrimp Flavor Ramen Soup \n",
557 | "... ... ... \n",
558 | "4115 Vifon Hu Tiu Nam Vang [\"Phnom Penh\" style] Asian Sty... \n",
559 | "4116 Wai Wai Oriental Style Instant Noodles \n",
560 | "4117 Wai Wai Tom Yum Shrimp \n",
561 | "4118 Wai Wai Tom Yum Chili Flavor \n",
562 | "4119 Westbrae Miso Ramen \n",
563 | "\n",
564 | " Style Country Stars spiciness \n",
565 | "0 Pack Taiwan 3 pepper free \n",
566 | "1 Bowl United States 4.5 pepper free \n",
567 | "2 Bowl Vietnam 3 pepper free \n",
568 | "3 Pack Taiwan 4.5 pepper free \n",
569 | "4 Pack United States 2.75 pepper free \n",
570 | "... ... ... ... ... \n",
571 | "4115 Bowl Vietnam 3.5 pepper free \n",
572 | "4116 Pack Thailand 1 pepper free \n",
573 | "4117 Pack Thailand 2 pepper free \n",
574 | "4118 Pack Thailand 2 pepper free \n",
575 | "4119 Pack United States 0.5 pepper free \n",
576 | "\n",
577 | "[4120 rows x 6 columns]"
578 | ]
579 | },
580 | "execution_count": 4,
581 | "metadata": {},
582 | "output_type": "execute_result"
583 | }
584 | ],
585 | "source": [
586 | "spicy_levels = ['mild','medium','spicy','hot','fiery']\n",
587 | "def spiciness(text):\n",
588 | " text= text.lower()\n",
589 | " for spice in spicy_levels:\n",
590 | " if spice in text:\n",
591 | " return spice\n",
592 | " return 'pepper free'\n",
593 | "\n",
594 | "df['spiciness'] = df['Variety'].apply(spiciness)\n",
595 | "df"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": 5,
601 | "metadata": {},
602 | "outputs": [
603 | {
604 | "data": {
605 | "text/plain": [
606 | "pepper free 3523\n",
607 | "spicy 446\n",
608 | "hot 128\n",
609 | "mild 21\n",
610 | "fiery 2\n",
611 | "Name: spiciness, dtype: int64"
612 | ]
613 | },
614 | "execution_count": 5,
615 | "metadata": {},
616 | "output_type": "execute_result"
617 | }
618 | ],
619 | "source": [
620 | "df.spiciness.value_counts()"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "execution_count": 15,
626 | "metadata": {},
627 | "outputs": [
628 | {
629 | "data": {
630 | "text/html": [
631 | "\n",
632 | "\n",
645 | "
\n",
646 | " \n",
647 | " \n",
648 | " | \n",
649 | " Brand | \n",
650 | " Variety | \n",
651 | " Style | \n",
652 | " Country | \n",
653 | " Stars | \n",
654 | " spiciness | \n",
655 | " spiciness_2 | \n",
656 | "
\n",
657 | " \n",
658 | " \n",
659 | " \n",
660 | " | 219 | \n",
661 | " Maggi | \n",
662 | " Masala Cuppa Noodles | \n",
663 | " Cup | \n",
664 | " India | \n",
665 | " 3.25 | \n",
666 | " pepper free | \n",
667 | " pepper free | \n",
668 | "
\n",
669 | " \n",
670 | " | 424 | \n",
671 | " Nissin | \n",
672 | " Top Ramen Masala Noodles | \n",
673 | " Pack | \n",
674 | " India | \n",
675 | " 4 | \n",
676 | " pepper free | \n",
677 | " pepper free | \n",
678 | "
\n",
679 | " \n",
680 | " | 496 | \n",
681 | " Nissin | \n",
682 | " Top Ramen Curry | \n",
683 | " Pack | \n",
684 | " India | \n",
685 | " 5 | \n",
686 | " pepper free | \n",
687 | " pepper free | \n",
688 | "
\n",
689 | " \n",
690 | " | 504 | \n",
691 | " Nissin | \n",
692 | " Top Ramen Chicken Noodles | \n",
693 | " Pack | \n",
694 | " India | \n",
695 | " 3.5 | \n",
696 | " pepper free | \n",
697 | " pepper free | \n",
698 | "
\n",
699 | " \n",
700 | " | 508 | \n",
701 | " Nissin | \n",
702 | " Top Ramen Fiery Chilli Noodles | \n",
703 | " Pack | \n",
704 | " India | \n",
705 | " 5 | \n",
706 | " fiery | \n",
707 | " pepper free | \n",
708 | "
\n",
709 | " \n",
710 | " | 1408 | \n",
711 | " Maggi | \n",
712 | " 2 Minute Noodles Masala Spicy (Export Version) | \n",
713 | " Pack | \n",
714 | " India | \n",
715 | " 5 | \n",
716 | " spicy | \n",
717 | " spicy | \n",
718 | "
\n",
719 | " \n",
720 | " | 1416 | \n",
721 | " Maggi | \n",
722 | " Cuppa Chilly Chow | \n",
723 | " Cup | \n",
724 | " India | \n",
725 | " 3.5 | \n",
726 | " pepper free | \n",
727 | " pepper free | \n",
728 | "
\n",
729 | " \n",
730 | " | 1435 | \n",
731 | " Nissin | \n",
732 | " Cup Noodles Veggi Manchow | \n",
733 | " Cup | \n",
734 | " India | \n",
735 | " 3.5 | \n",
736 | " pepper free | \n",
737 | " pepper free | \n",
738 | "
\n",
739 | " \n",
740 | " | 1442 | \n",
741 | " Maggi | \n",
742 | " Hot Heads Barbeque Pepper Noodles | \n",
743 | " Pack | \n",
744 | " India | \n",
745 | " 0 | \n",
746 | " hot | \n",
747 | " hot | \n",
748 | "
\n",
749 | " \n",
750 | " | 1474 | \n",
751 | " Ching's Secret | \n",
752 | " Schezwan Instant Noodles | \n",
753 | " Pack | \n",
754 | " India | \n",
755 | " 3.5 | \n",
756 | " pepper free | \n",
757 | " pepper free | \n",
758 | "
\n",
759 | " \n",
760 | " | 1481 | \n",
761 | " Maggi | \n",
762 | " Hot Heads Peri Peri Noodles | \n",
763 | " Pack | \n",
764 | " India | \n",
765 | " 4 | \n",
766 | " hot | \n",
767 | " hot | \n",
768 | "
\n",
769 | " \n",
770 | " | 1494 | \n",
771 | " Nissin | \n",
772 | " Cup Noodles Mazedaar Masala | \n",
773 | " Cup | \n",
774 | " India | \n",
775 | " 3.5 | \n",
776 | " pepper free | \n",
777 | " pepper free | \n",
778 | "
\n",
779 | " \n",
780 | " | 1499 | \n",
781 | " Maggi | \n",
782 | " Pazzta Cheese Macaroni | \n",
783 | " Pack | \n",
784 | " India | \n",
785 | " 3.5 | \n",
786 | " pepper free | \n",
787 | " pepper free | \n",
788 | "
\n",
789 | " \n",
790 | " | 1521 | \n",
791 | " Maggi | \n",
792 | " Cuppa Masala | \n",
793 | " Cup | \n",
794 | " India | \n",
795 | " 3.75 | \n",
796 | " pepper free | \n",
797 | " pepper free | \n",
798 | "
\n",
799 | " \n",
800 | " | 1533 | \n",
801 | " Sunfeast | \n",
802 | " Yippee! Noodles Magic Masala | \n",
803 | " Pack | \n",
804 | " India | \n",
805 | " 4.25 | \n",
806 | " pepper free | \n",
807 | " pepper free | \n",
808 | "
\n",
809 | " \n",
810 | " | 1544 | \n",
811 | " Ching's Secret | \n",
812 | " Singapore Curry | \n",
813 | " Pack | \n",
814 | " India | \n",
815 | " 3.75 | \n",
816 | " pepper free | \n",
817 | " pepper free | \n",
818 | "
\n",
819 | " \n",
820 | " | 1556 | \n",
821 | " TRDP | \n",
822 | " Mario Masala Noodles | \n",
823 | " Pack | \n",
824 | " India | \n",
825 | " 3.75 | \n",
826 | " pepper free | \n",
827 | " pepper free | \n",
828 | "
\n",
829 | " \n",
830 | " | 1564 | \n",
831 | " Ching's Secret | \n",
832 | " Hot Garlic Instant Noodles | \n",
833 | " Pack | \n",
834 | " India | \n",
835 | " 4.25 | \n",
836 | " hot | \n",
837 | " hot | \n",
838 | "
\n",
839 | " \n",
840 | " | 1611 | \n",
841 | " Maggi | \n",
842 | " Hot Heads Green Chilli Noodles | \n",
843 | " Pack | \n",
844 | " India | \n",
845 | " 3.5 | \n",
846 | " hot | \n",
847 | " hot | \n",
848 | "
\n",
849 | " \n",
850 | " | 1718 | \n",
851 | " Maggi | \n",
852 | " Nutri-licious Pazzta Tomato Twist | \n",
853 | " Pack | \n",
854 | " India | \n",
855 | " 3.75 | \n",
856 | " pepper free | \n",
857 | " pepper free | \n",
858 | "
\n",
859 | " \n",
860 | " | 1747 | \n",
861 | " Nissin | \n",
862 | " Top Ramen Atta Nooldes Masala | \n",
863 | " Pack | \n",
864 | " India | \n",
865 | " 4.5 | \n",
866 | " pepper free | \n",
867 | " pepper free | \n",
868 | "
\n",
869 | " \n",
870 | " | 1760 | \n",
871 | " Wai Wai | \n",
872 | " Instant Noodles Artificial Chicken Flavoured | \n",
873 | " Pack | \n",
874 | " India | \n",
875 | " 1.5 | \n",
876 | " pepper free | \n",
877 | " pepper free | \n",
878 | "
\n",
879 | " \n",
880 | " | 1769 | \n",
881 | " 1 To 3 Noodles | \n",
882 | " Chatpat Masala | \n",
883 | " Pack | \n",
884 | " India | \n",
885 | " 4 | \n",
886 | " pepper free | \n",
887 | " pepper free | \n",
888 | "
\n",
889 | " \n",
890 | " | 1791 | \n",
891 | " Patanjali | \n",
892 | " Atta Noodles Jhatpat Banao Befikr Khao | \n",
893 | " Pack | \n",
894 | " India | \n",
895 | " 4.5 | \n",
896 | " pepper free | \n",
897 | " pepper free | \n",
898 | "
\n",
899 | " \n",
900 | " | 1816 | \n",
901 | " Maggi | \n",
902 | " 2 Minute Noodles Masala Spicy | \n",
903 | " Pack | \n",
904 | " India | \n",
905 | " 5 | \n",
906 | " spicy | \n",
907 | " spicy | \n",
908 | "
\n",
909 | " \n",
910 | " | 1851 | \n",
911 | " Wai Wai | \n",
912 | " Instant Noodles Veg Masala Flavour | \n",
913 | " Pack | \n",
914 | " India | \n",
915 | " 3.5 | \n",
916 | " pepper free | \n",
917 | " pepper free | \n",
918 | "
\n",
919 | " \n",
920 | " | 1896 | \n",
921 | " Wai Wai | \n",
922 | " X-Press Flavour Of Pizza In Noodles Proprietar... | \n",
923 | " Pack | \n",
924 | " India | \n",
925 | " 4 | \n",
926 | " pepper free | \n",
927 | " pepper free | \n",
928 | "
\n",
929 | " \n",
930 | " | 2087 | \n",
931 | " Chaudhary's Wai Wai | \n",
932 | " Instant Noodles Artificial Chicken & Shrimp Fl... | \n",
933 | " Pack | \n",
934 | " India | \n",
935 | " 2.5 | \n",
936 | " pepper free | \n",
937 | " pepper free | \n",
938 | "
\n",
939 | " \n",
940 | " | 2521 | \n",
941 | " Nissin | \n",
942 | " Top Ramen Super Noodles Tomato | \n",
943 | " Pack | \n",
944 | " India | \n",
945 | " 3.5 | \n",
946 | " pepper free | \n",
947 | " pepper free | \n",
948 | "
\n",
949 | " \n",
950 | " | 2544 | \n",
951 | " Maggi | \n",
952 | " Authentic Indian Noodles Vegetable Atta Masala | \n",
953 | " Pack | \n",
954 | " India | \n",
955 | " 3.75 | \n",
956 | " pepper free | \n",
957 | " pepper free | \n",
958 | "
\n",
959 | " \n",
960 | " | 2554 | \n",
961 | " Nissin | \n",
962 | " Cup Noodles Tomato Enjoy Noodles With Hot Soup | \n",
963 | " Cup | \n",
964 | " India | \n",
965 | " 3.25 | \n",
966 | " hot | \n",
967 | " hot | \n",
968 | "
\n",
969 | " \n",
970 | " | 2564 | \n",
971 | " Ching's Secret | \n",
972 | " Hot Garlic Instant Noodles | \n",
973 | " Pack | \n",
974 | " India | \n",
975 | " 3.75 | \n",
976 | " hot | \n",
977 | " hot | \n",
978 | "
\n",
979 | " \n",
980 | " | 2577 | \n",
981 | " Maggi | \n",
982 | " 2 Minute Noodles Hungrooo Masala Spicy | \n",
983 | " Pack | \n",
984 | " India | \n",
985 | " 3.75 | \n",
986 | " spicy | \n",
987 | " spicy | \n",
988 | "
\n",
989 | " \n",
990 | " | 2601 | \n",
991 | " Ching's Secret | \n",
992 | " Manchurian Instant Noodles | \n",
993 | " Pack | \n",
994 | " India | \n",
995 | " 3.5 | \n",
996 | " pepper free | \n",
997 | " pepper free | \n",
998 | "
\n",
999 | " \n",
1000 | " | 2614 | \n",
1001 | " Nissin | \n",
1002 | " Cup Noodles Mug Noodles Spicy Vegetable | \n",
1003 | " Pack | \n",
1004 | " India | \n",
1005 | " 3.75 | \n",
1006 | " spicy | \n",
1007 | " spicy | \n",
1008 | "
\n",
1009 | " \n",
1010 | " | 2631 | \n",
1011 | " Maggi | \n",
1012 | " Multigrainz Noodles Spice Remix | \n",
1013 | " Pack | \n",
1014 | " India | \n",
1015 | " 3.25 | \n",
1016 | " pepper free | \n",
1017 | " pepper free | \n",
1018 | "
\n",
1019 | " \n",
1020 | " | 2715 | \n",
1021 | " Maggi | \n",
1022 | " Masala Dumdaar Noodles | \n",
1023 | " Pack | \n",
1024 | " India | \n",
1025 | " 3.75 | \n",
1026 | " pepper free | \n",
1027 | " pepper free | \n",
1028 | "
\n",
1029 | " \n",
1030 | " | 3357 | \n",
1031 | " Maggi | \n",
1032 | " 2 Minute Noodles Tricky Tomato | \n",
1033 | " Pack | \n",
1034 | " India | \n",
1035 | " 3.25 | \n",
1036 | " pepper free | \n",
1037 | " pepper free | \n",
1038 | "
\n",
1039 | " \n",
1040 | " | 3504 | \n",
1041 | " Maggi | \n",
1042 | " 2 Minute Noodles Thrillin' Curry | \n",
1043 | " Pack | \n",
1044 | " India | \n",
1045 | " 2 | \n",
1046 | " pepper free | \n",
1047 | " pepper free | \n",
1048 | "
\n",
1049 | " \n",
1050 | " | 3546 | \n",
1051 | " Maggi | \n",
1052 | " 2 Minute Noodles Tricky Tomato | \n",
1053 | " Pack | \n",
1054 | " India | \n",
1055 | " 3.75 | \n",
1056 | " pepper free | \n",
1057 | " pepper free | \n",
1058 | "
\n",
1059 | " \n",
1060 | " | 3924 | \n",
1061 | " Nissin | \n",
1062 | " Cup Noodles Pani Puri | \n",
1063 | " Cup | \n",
1064 | " India | \n",
1065 | " 1.5 | \n",
1066 | " pepper free | \n",
1067 | " pepper free | \n",
1068 | "
\n",
1069 | " \n",
1070 | " | 3931 | \n",
1071 | " Nissin | \n",
1072 | " Cup Noodles Manchurian | \n",
1073 | " Cup | \n",
1074 | " India | \n",
1075 | " 3 | \n",
1076 | " pepper free | \n",
1077 | " pepper free | \n",
1078 | "
\n",
1079 | " \n",
1080 | " | 4058 | \n",
1081 | " Maggi | \n",
1082 | " Rice Noodle Mania Lemon Masala | \n",
1083 | " Pack | \n",
1084 | " India | \n",
1085 | " 1.5 | \n",
1086 | " pepper free | \n",
1087 | " pepper free | \n",
1088 | "
\n",
1089 | " \n",
1090 | " | 4059 | \n",
1091 | " Maggi | \n",
1092 | " 2 Minute Noodles Curry | \n",
1093 | " Pack | \n",
1094 | " India | \n",
1095 | " 2.5 | \n",
1096 | " pepper free | \n",
1097 | " pepper free | \n",
1098 | "
\n",
1099 | " \n",
1100 | " | 4060 | \n",
1101 | " Maggi | \n",
1102 | " Vegetable Atta Noodles Masala | \n",
1103 | " Pack | \n",
1104 | " India | \n",
1105 | " 2 | \n",
1106 | " pepper free | \n",
1107 | " pepper free | \n",
1108 | "
\n",
1109 | " \n",
1110 | " | 4061 | \n",
1111 | " Maggi | \n",
1112 | " Chinese Noodles Lemon Chicken Flavor | \n",
1113 | " Pack | \n",
1114 | " India | \n",
1115 | " 3.5 | \n",
1116 | " pepper free | \n",
1117 | " mild | \n",
1118 | "
\n",
1119 | " \n",
1120 | "
\n",
1121 | "
"
1122 | ],
1123 | "text/plain": [
1124 | " Brand Variety \\\n",
1125 | "219 Maggi Masala Cuppa Noodles \n",
1126 | "424 Nissin Top Ramen Masala Noodles \n",
1127 | "496 Nissin Top Ramen Curry \n",
1128 | "504 Nissin Top Ramen Chicken Noodles \n",
1129 | "508 Nissin Top Ramen Fiery Chilli Noodles \n",
1130 | "1408 Maggi 2 Minute Noodles Masala Spicy (Export Version) \n",
1131 | "1416 Maggi Cuppa Chilly Chow \n",
1132 | "1435 Nissin Cup Noodles Veggi Manchow \n",
1133 | "1442 Maggi Hot Heads Barbeque Pepper Noodles \n",
1134 | "1474 Ching's Secret Schezwan Instant Noodles \n",
1135 | "1481 Maggi Hot Heads Peri Peri Noodles \n",
1136 | "1494 Nissin Cup Noodles Mazedaar Masala \n",
1137 | "1499 Maggi Pazzta Cheese Macaroni \n",
1138 | "1521 Maggi Cuppa Masala \n",
1139 | "1533 Sunfeast Yippee! Noodles Magic Masala \n",
1140 | "1544 Ching's Secret Singapore Curry \n",
1141 | "1556 TRDP Mario Masala Noodles \n",
1142 | "1564 Ching's Secret Hot Garlic Instant Noodles \n",
1143 | "1611 Maggi Hot Heads Green Chilli Noodles \n",
1144 | "1718 Maggi Nutri-licious Pazzta Tomato Twist \n",
1145 | "1747 Nissin Top Ramen Atta Nooldes Masala \n",
1146 | "1760 Wai Wai Instant Noodles Artificial Chicken Flavoured \n",
1147 | "1769 1 To 3 Noodles Chatpat Masala \n",
1148 | "1791 Patanjali Atta Noodles Jhatpat Banao Befikr Khao \n",
1149 | "1816 Maggi 2 Minute Noodles Masala Spicy \n",
1150 | "1851 Wai Wai Instant Noodles Veg Masala Flavour \n",
1151 | "1896 Wai Wai X-Press Flavour Of Pizza In Noodles Proprietar... \n",
1152 | "2087 Chaudhary's Wai Wai Instant Noodles Artificial Chicken & Shrimp Fl... \n",
1153 | "2521 Nissin Top Ramen Super Noodles Tomato \n",
1154 | "2544 Maggi Authentic Indian Noodles Vegetable Atta Masala \n",
1155 | "2554 Nissin Cup Noodles Tomato Enjoy Noodles With Hot Soup \n",
1156 | "2564 Ching's Secret Hot Garlic Instant Noodles \n",
1157 | "2577 Maggi 2 Minute Noodles Hungrooo Masala Spicy \n",
1158 | "2601 Ching's Secret Manchurian Instant Noodles \n",
1159 | "2614 Nissin Cup Noodles Mug Noodles Spicy Vegetable \n",
1160 | "2631 Maggi Multigrainz Noodles Spice Remix \n",
1161 | "2715 Maggi Masala Dumdaar Noodles \n",
1162 | "3357 Maggi 2 Minute Noodles Tricky Tomato \n",
1163 | "3504 Maggi 2 Minute Noodles Thrillin' Curry \n",
1164 | "3546 Maggi 2 Minute Noodles Tricky Tomato \n",
1165 | "3924 Nissin Cup Noodles Pani Puri \n",
1166 | "3931 Nissin Cup Noodles Manchurian \n",
1167 | "4058 Maggi Rice Noodle Mania Lemon Masala \n",
1168 | "4059 Maggi 2 Minute Noodles Curry \n",
1169 | "4060 Maggi Vegetable Atta Noodles Masala \n",
1170 | "4061 Maggi Chinese Noodles Lemon Chicken Flavor \n",
1171 | "\n",
1172 | " Style Country Stars spiciness spiciness_2 \n",
1173 | "219 Cup India 3.25 pepper free pepper free \n",
1174 | "424 Pack India 4 pepper free pepper free \n",
1175 | "496 Pack India 5 pepper free pepper free \n",
1176 | "504 Pack India 3.5 pepper free pepper free \n",
1177 | "508 Pack India 5 fiery pepper free \n",
1178 | "1408 Pack India 5 spicy spicy \n",
1179 | "1416 Cup India 3.5 pepper free pepper free \n",
1180 | "1435 Cup India 3.5 pepper free pepper free \n",
1181 | "1442 Pack India 0 hot hot \n",
1182 | "1474 Pack India 3.5 pepper free pepper free \n",
1183 | "1481 Pack India 4 hot hot \n",
1184 | "1494 Cup India 3.5 pepper free pepper free \n",
1185 | "1499 Pack India 3.5 pepper free pepper free \n",
1186 | "1521 Cup India 3.75 pepper free pepper free \n",
1187 | "1533 Pack India 4.25 pepper free pepper free \n",
1188 | "1544 Pack India 3.75 pepper free pepper free \n",
1189 | "1556 Pack India 3.75 pepper free pepper free \n",
1190 | "1564 Pack India 4.25 hot hot \n",
1191 | "1611 Pack India 3.5 hot hot \n",
1192 | "1718 Pack India 3.75 pepper free pepper free \n",
1193 | "1747 Pack India 4.5 pepper free pepper free \n",
1194 | "1760 Pack India 1.5 pepper free pepper free \n",
1195 | "1769 Pack India 4 pepper free pepper free \n",
1196 | "1791 Pack India 4.5 pepper free pepper free \n",
1197 | "1816 Pack India 5 spicy spicy \n",
1198 | "1851 Pack India 3.5 pepper free pepper free \n",
1199 | "1896 Pack India 4 pepper free pepper free \n",
1200 | "2087 Pack India 2.5 pepper free pepper free \n",
1201 | "2521 Pack India 3.5 pepper free pepper free \n",
1202 | "2544 Pack India 3.75 pepper free pepper free \n",
1203 | "2554 Cup India 3.25 hot hot \n",
1204 | "2564 Pack India 3.75 hot hot \n",
1205 | "2577 Pack India 3.75 spicy spicy \n",
1206 | "2601 Pack India 3.5 pepper free pepper free \n",
1207 | "2614 Pack India 3.75 spicy spicy \n",
1208 | "2631 Pack India 3.25 pepper free pepper free \n",
1209 | "2715 Pack India 3.75 pepper free pepper free \n",
1210 | "3357 Pack India 3.25 pepper free pepper free \n",
1211 | "3504 Pack India 2 pepper free pepper free \n",
1212 | "3546 Pack India 3.75 pepper free pepper free \n",
1213 | "3924 Cup India 1.5 pepper free pepper free \n",
1214 | "3931 Cup India 3 pepper free pepper free \n",
1215 | "4058 Pack India 1.5 pepper free pepper free \n",
1216 | "4059 Pack India 2.5 pepper free pepper free \n",
1217 | "4060 Pack India 2 pepper free pepper free \n",
1218 | "4061 Pack India 3.5 pepper free mild "
1219 | ]
1220 | },
1221 | "execution_count": 15,
1222 | "metadata": {},
1223 | "output_type": "execute_result"
1224 | }
1225 | ],
1226 | "source": [
1227 | "df[df.Country == 'India']"
1228 | ]
1229 | },
1230 | {
1231 | "cell_type": "code",
1232 | "execution_count": 11,
1233 | "metadata": {},
1234 | "outputs": [
1235 | {
1236 | "data": {
1237 | "text/plain": [
1238 | "[('instant noodles', 298),\n",
1239 | " ('noodle soup', 178),\n",
1240 | " ('cup noodles', 161),\n",
1241 | " ('instant noodle', 134),\n",
1242 | " ('tom yum', 117),\n",
1243 | " ('chicken flavor', 104),\n",
1244 | " ('noodles with', 93),\n",
1245 | " ('ramen noodle', 84),\n",
1246 | " ('rice noodle', 77),\n",
1247 | " ('beef flavor', 74),\n",
1248 | " ('mi goreng', 73),\n",
1249 | " ('flavour instant', 73),\n",
1250 | " ('hot spicy', 71),\n",
1251 | " ('soy sauce', 68),\n",
1252 | " ('flavor ramen', 65),\n",
1253 | " ('chicken flavour', 63),\n",
1254 | " ('flavor instant', 57),\n",
1255 | " ('spicy beef', 55),\n",
1256 | " ('beef flavour', 55),\n",
1257 | " ('noodle with', 51),\n",
1258 | " ('tonkotsu ramen', 50),\n",
1259 | " ('rice vermicelli', 49),\n",
1260 | " ('miso ramen', 48),\n",
1261 | " ('noodles chicken', 48),\n",
1262 | " ('demae ramen', 48),\n",
1263 | " ('chow mein', 47),\n",
1264 | " ('ramen noodles', 45),\n",
1265 | " ('instant rice', 43),\n",
1266 | " ('bowl noodle', 41),\n",
1267 | " ('cup noodle', 40),\n",
1268 | " ('rice noodles', 40),\n",
1269 | " ('spicy chicken', 39),\n",
1270 | " ('curry flavour', 38),\n",
1271 | " ('shoyu ramen', 37),\n",
1272 | " ('style instant', 36),\n",
1273 | " ('artificial beef', 33),\n",
1274 | " ('fried noodles', 33),\n",
1275 | " ('seafood flavour', 33),\n",
1276 | " ('white curry', 33),\n",
1277 | " ('hot sour', 32),\n",
1278 | " ('pork flavor', 31),\n",
1279 | " ('tom yam', 31),\n",
1280 | " ('shio ramen', 30),\n",
1281 | " ('artificial chicken', 29),\n",
1282 | " ('top ramen', 29),\n",
1283 | " ('sauce flavor', 28),\n",
1284 | " ('sesame oil', 28),\n",
1285 | " ('pork flavour', 26),\n",
1286 | " ('penang white', 26),\n",
1287 | " ('oriental style', 26),\n",
1288 | " ('japanese style', 25),\n",
1289 | " ('noodles spicy', 25),\n",
1290 | " ('spicy seafood', 25),\n",
1291 | " ('kung fu', 25),\n",
1292 | " ('noodles tom', 25),\n",
1293 | " ('yum shrimp', 22),\n",
1294 | " ('with soup', 22),\n",
1295 | " ('artificial pork', 22),\n",
1296 | " ('noodles beef', 22),\n",
1297 | " ('curry noodle', 22),\n",
1298 | " ('soup base', 22),\n",
1299 | " ('shrimp flavor', 21),\n",
1300 | " ('ramen spicy', 21),\n",
1301 | " ('non fried', 21),\n",
1302 | " ('noodle spicy', 21),\n",
1303 | " ('demae iccho', 21),\n",
1304 | " ('seafood flavor', 20),\n",
1305 | " ('black pepper', 20),\n",
1306 | " ('shrimp tom', 20),\n",
1307 | " ('yum flavour', 20),\n",
1308 | " ('buldak bokkeummyun', 20),\n",
1309 | " ('sabor a', 20),\n",
1310 | " ('dried noodles', 19),\n",
1311 | " ('noodles artificial', 19),\n",
1312 | " ('korean style', 19),\n",
1313 | " ('noodles shrimp', 19),\n",
1314 | " ('noodle bowl', 19),\n",
1315 | " ('spicy flavor', 19),\n",
1316 | " ('big bowl', 19),\n",
1317 | " ('xo sauce', 18),\n",
1318 | " ('soup flavour', 18),\n",
1319 | " ('mi segera', 18),\n",
1320 | " ('bowl noodles', 18),\n",
1321 | " ('ramen soup', 17),\n",
1322 | " ('spicy shrimp', 17),\n",
1323 | " ('shrimp flavour', 17),\n",
1324 | " ('crab flavour', 17),\n",
1325 | " ('creamy tom', 17),\n",
1326 | " ('green curry', 17),\n",
1327 | " ('noodles hot', 17),\n",
1328 | " ('oriental noodles', 17),\n",
1329 | " ('sesame sauce', 17),\n",
1330 | " ('minced pork', 16),\n",
1331 | " ('instant lunch', 16),\n",
1332 | " ('noodles vegetable', 16),\n",
1333 | " ('braised beef', 16),\n",
1334 | " ('noodle king', 16),\n",
1335 | " ('rasa ayam', 16),\n",
1336 | " ('super noodles', 16),\n",
1337 | " ('curry udon', 15),\n",
1338 | " ('fried noodle', 15),\n",
1339 | " ('spicy flavour', 15),\n",
1340 | " ('flavour noodle', 15),\n",
1341 | " ('minute noodles', 15),\n",
1342 | " ('south korean', 15),\n",
1343 | " ('dry noodle', 14),\n",
1344 | " ('stir fry', 14),\n",
1345 | " ('curry flavor', 14),\n",
1346 | " ('noodles soup', 14),\n",
1347 | " ('yum goong', 14),\n",
1348 | " ('pad thai', 14),\n",
1349 | " ('laksa flavour', 14),\n",
1350 | " ('spicy hot', 14),\n",
1351 | " ('sopa nissin', 14),\n",
1352 | " ('style noodles', 13),\n",
1353 | " ('hot chicken', 13),\n",
1354 | " ('soup chicken', 13),\n",
1355 | " ('instant ramen', 13),\n",
1356 | " ('beef noodle', 13),\n",
1357 | " ('instant cup', 13),\n",
1358 | " ('hot pot', 13),\n",
1359 | " ('kitsune udon', 13),\n",
1360 | " ('beef with', 13),\n",
1361 | " ('pepper crab', 13),\n",
1362 | " ('artificial spicy', 13),\n",
1363 | " ('dry noodles', 13),\n",
1364 | " ('kuah rasa', 13),\n",
1365 | " ('taste of', 13),\n",
1366 | " ('big cup', 13),\n",
1367 | " ('spicy miso', 12),\n",
1368 | " ('stir fried', 12),\n",
1369 | " ('u f', 12),\n",
1370 | " ('f o', 12),\n",
1371 | " ('kimchi ramen', 12),\n",
1372 | " ('sichuan pepper', 12),\n",
1373 | " ('tonkotsu flavour', 12),\n",
1374 | " ('with spicy', 12),\n",
1375 | " ('la mian', 12),\n",
1376 | " ('vegetable flavour', 12),\n",
1377 | " ('chicken curry', 12),\n",
1378 | " ('noodles curry', 12),\n",
1379 | " ('soybean paste', 12),\n",
1380 | " ('new recipe', 12),\n",
1381 | " ('flavor noodle', 12),\n",
1382 | " ('noodles seafood', 12),\n",
1383 | " ('noodle snack', 12),\n",
1384 | " ('fu artificial', 12),\n",
1385 | " ('style noodle', 11),\n",
1386 | " ('sauce ramen', 11),\n",
1387 | " ('shrimp creamy', 11),\n",
1388 | " ('tempura udon', 11),\n",
1389 | " ('asian style', 11),\n",
1390 | " ('original flavor', 11),\n",
1391 | " ('chicken soup', 11),\n",
1392 | " ('prawn soup', 11),\n",
1393 | " ('penang red', 11),\n",
1394 | " ('instant vermicelli', 11),\n",
1395 | " ('yum kung', 11),\n",
1396 | " ('premium noodle', 11),\n",
1397 | " ('pancit canton', 11),\n",
1398 | " ('sweet potato', 11),\n",
1399 | " ('shin ramyun', 11),\n",
1400 | " ('malaysia penang', 11)]"
1401 | ]
1402 | },
1403 | "execution_count": 11,
1404 | "metadata": {},
1405 | "output_type": "execute_result"
1406 | }
1407 | ],
1408 | "source": [
1409 | "popular_phrases.sort(key=lambda x: x[1],reverse = True)\n",
1410 | "popular_phrases"
1411 | ]
1412 | },
1413 | {
1414 | "cell_type": "code",
1415 | "execution_count": 6,
1416 | "metadata": {},
1417 | "outputs": [
1418 | {
1419 | "data": {
1420 | "text/plain": [
1421 | "[('dry noodle', 14),\n",
1422 | " ('tonkotsu ramen', 50),\n",
1423 | " ('japanese style', 25),\n",
1424 | " ('style noodles', 13),\n",
1425 | " ('tom yum', 117),\n",
1426 | " ('yum shrimp', 22),\n",
1427 | " ('shrimp flavor', 21),\n",
1428 | " ('flavor ramen', 65),\n",
1429 | " ('ramen soup', 17),\n",
1430 | " ('rice noodle', 77),\n",
1431 | " ('chicken flavor', 104),\n",
1432 | " ('seafood flavor', 20),\n",
1433 | " ('dried noodles', 19),\n",
1434 | " ('spicy beef', 55),\n",
1435 | " ('beef flavor', 74),\n",
1436 | " ('mi goreng', 73),\n",
1437 | " ('black pepper', 20),\n",
1438 | " ('hot spicy', 71),\n",
1439 | " ('soy sauce', 68),\n",
1440 | " ('ramen noodle', 84),\n",
1441 | " ('noodle soup', 178),\n",
1442 | " ('instant noodles', 298),\n",
1443 | " ('noodles artificial', 19),\n",
1444 | " ('artificial beef', 33),\n",
1445 | " ('chow mein', 47),\n",
1446 | " ('cup noodle', 40),\n",
1447 | " ('spicy shrimp', 17),\n",
1448 | " ('rice noodles', 40),\n",
1449 | " ('spicy miso', 12),\n",
1450 | " ('miso ramen', 48),\n",
1451 | " ('instant rice', 43),\n",
1452 | " ('rice vermicelli', 49),\n",
1453 | " ('shoyu ramen', 37),\n",
1454 | " ('cup noodles', 161),\n",
1455 | " ('stir fry', 14),\n",
1456 | " ('curry flavor', 14),\n",
1457 | " ('sauce flavor', 28),\n",
1458 | " ('fried noodles', 33),\n",
1459 | " ('stir fried', 12),\n",
1460 | " ('noodles with', 93),\n",
1461 | " ('korean style', 19),\n",
1462 | " ('noodle with', 51),\n",
1463 | " ('ramen spicy', 21),\n",
1464 | " ('style noodle', 11),\n",
1465 | " ('sauce ramen', 11),\n",
1466 | " ('minced pork', 16),\n",
1467 | " ('pork flavor', 31),\n",
1468 | " ('spicy chicken', 39),\n",
1469 | " ('shio ramen', 30),\n",
1470 | " ('noodles chicken', 48),\n",
1471 | " ('chicken flavour', 63),\n",
1472 | " ('shrimp tom', 20),\n",
1473 | " ('instant lunch', 16),\n",
1474 | " ('artificial chicken', 29),\n",
1475 | " ('flavor instant', 57),\n",
1476 | " ('instant noodle', 134),\n",
1477 | " ('shrimp flavour', 17),\n",
1478 | " ('flavour instant', 73),\n",
1479 | " ('crab flavour', 17),\n",
1480 | " ('hot chicken', 13),\n",
1481 | " ('noodles vegetable', 16),\n",
1482 | " ('shrimp creamy', 11),\n",
1483 | " ('creamy tom', 17),\n",
1484 | " ('yum flavour', 20),\n",
1485 | " ('demae ramen', 48),\n",
1486 | " ('xo sauce', 18),\n",
1487 | " ('seafood flavour', 33),\n",
1488 | " ('soup chicken', 13),\n",
1489 | " ('u f', 12),\n",
1490 | " ('f o', 12),\n",
1491 | " ('noodles soup', 14),\n",
1492 | " ('instant ramen', 13),\n",
1493 | " ('beef flavour', 55),\n",
1494 | " ('tempura udon', 11),\n",
1495 | " ('braised beef', 16),\n",
1496 | " ('beef noodle', 13),\n",
1497 | " ('with soup', 22),\n",
1498 | " ('kimchi ramen', 12),\n",
1499 | " ('top ramen', 29),\n",
1500 | " ('asian style', 11),\n",
1501 | " ('sesame oil', 28),\n",
1502 | " ('ramen noodles', 45),\n",
1503 | " ('noodles shrimp', 19),\n",
1504 | " ('noodle bowl', 19),\n",
1505 | " ('noodles spicy', 25),\n",
1506 | " ('artificial pork', 22),\n",
1507 | " ('pork flavour', 26),\n",
1508 | " ('original flavor', 11),\n",
1509 | " ('sichuan pepper', 12),\n",
1510 | " ('instant cup', 13),\n",
1511 | " ('hot pot', 13),\n",
1512 | " ('tonkotsu flavour', 12),\n",
1513 | " ('chicken soup', 11),\n",
1514 | " ('soup flavour', 18),\n",
1515 | " ('with spicy', 12),\n",
1516 | " ('spicy seafood', 25),\n",
1517 | " ('spicy flavor', 19),\n",
1518 | " ('bowl noodle', 41),\n",
1519 | " ('prawn soup', 11),\n",
1520 | " ('la mian', 12),\n",
1521 | " ('kitsune udon', 13),\n",
1522 | " ('penang red', 11),\n",
1523 | " ('yum goong', 14),\n",
1524 | " ('hot sour', 32),\n",
1525 | " ('instant vermicelli', 11),\n",
1526 | " ('non fried', 21),\n",
1527 | " ('vegetable flavour', 12),\n",
1528 | " ('green curry', 17),\n",
1529 | " ('pad thai', 14),\n",
1530 | " ('noodles hot', 17),\n",
1531 | " ('curry flavour', 38),\n",
1532 | " ('noodles beef', 22),\n",
1533 | " ('chicken curry', 12),\n",
1534 | " ('mi segera', 18),\n",
1535 | " ('penang white', 26),\n",
1536 | " ('white curry', 33),\n",
1537 | " ('curry noodle', 22),\n",
1538 | " ('style instant', 36),\n",
1539 | " ('noodles curry', 12),\n",
1540 | " ('curry udon', 15),\n",
1541 | " ('fried noodle', 15),\n",
1542 | " ('beef with', 13),\n",
1543 | " ('yum kung', 11),\n",
1544 | " ('noodle king', 16),\n",
1545 | " ('soybean paste', 12),\n",
1546 | " ('laksa flavour', 14),\n",
1547 | " ('pepper crab', 13),\n",
1548 | " ('spicy flavour', 15),\n",
1549 | " ('artificial spicy', 13),\n",
1550 | " ('noodle spicy', 21),\n",
1551 | " ('oriental noodles', 17),\n",
1552 | " ('premium noodle', 11),\n",
1553 | " ('spicy hot', 14),\n",
1554 | " ('new recipe', 12),\n",
1555 | " ('oriental style', 26),\n",
1556 | " ('dry noodles', 13),\n",
1557 | " ('big bowl', 19),\n",
1558 | " ('flavour noodle', 15),\n",
1559 | " ('kung fu', 25),\n",
1560 | " ('noodles tom', 25),\n",
1561 | " ('pancit canton', 11),\n",
1562 | " ('sesame sauce', 17),\n",
1563 | " ('sweet potato', 11),\n",
1564 | " ('kuah rasa', 13),\n",
1565 | " ('rasa ayam', 16),\n",
1566 | " ('flavor noodle', 12),\n",
1567 | " ('buldak bokkeummyun', 20),\n",
1568 | " ('noodles seafood', 12),\n",
1569 | " ('shin ramyun', 11),\n",
1570 | " ('noodle snack', 12),\n",
1571 | " ('tom yam', 31),\n",
1572 | " ('minute noodles', 15),\n",
1573 | " ('taste of', 13),\n",
1574 | " ('demae iccho', 21),\n",
1575 | " ('soup base', 22),\n",
1576 | " ('sopa nissin', 14),\n",
1577 | " ('sabor a', 20),\n",
1578 | " ('bowl noodles', 18),\n",
1579 | " ('big cup', 13),\n",
1580 | " ('malaysia penang', 11),\n",
1581 | " ('super noodles', 16),\n",
1582 | " ('south korean', 15),\n",
1583 | " ('fu artificial', 12)]"
1584 | ]
1585 | },
1586 | "execution_count": 6,
1587 | "metadata": {},
1588 | "output_type": "execute_result"
1589 | }
1590 | ],
1591 | "source": [
1592 | "# extracting spiciness from text\n",
1593 | "# cleaning th variety string\n",
1594 | "variety_list = (df['Variety'].str.lower()\n",
1595 | " .str.replace('[^a-zA-Z]', ' ', regex=True)\n",
1596 | " .str.replace(' +', ' ', regex=True).values)\n",
1597 | "\n",
1598 | "# finding the most popular phrases to catagorize\n",
1599 | "pairs_dict = {}\n",
1600 | "for i in variety_list:\n",
1601 | " words = i.split()\n",
1602 | " for j in range(len(words) - 1):\n",
1603 | " pair = words[j] + ' ' + words[j+1]\n",
1604 | " if pair in pairs_dict:\n",
1605 | " pairs_dict[pair] += 1\n",
1606 | " else:\n",
1607 | " pairs_dict[pair] = 1\n",
1608 | " \n",
1609 | "popular_phrases = [(i, pairs_dict[i]) for i in pairs_dict if pairs_dict[i] > 10]\n",
1610 | "popular_phrases"
1611 | ]
1612 | },
1613 | {
1614 | "cell_type": "code",
1615 | "execution_count": 7,
1616 | "metadata": {},
1617 | "outputs": [
1618 | {
1619 | "data": {
1620 | "text/plain": [
1621 | "pepper free 2834\n",
1622 | "mild 526\n",
1623 | "spicy 446\n",
1624 | "hot 314\n",
1625 | "Name: spiciness_2, dtype: int64"
1626 | ]
1627 | },
1628 | "execution_count": 7,
1629 | "metadata": {},
1630 | "output_type": "execute_result"
1631 | }
1632 | ],
1633 | "source": [
1634 | "spiciness_dict = {\n",
1635 | " 'tonkotsu ramen': 'mild',\n",
1636 | " 'japanese style': 'mild',\n",
1637 | " 'tom yum': 'hot',\n",
1638 | " 'chicken flavor': 'mild',\n",
1639 | " 'seafood flavor': 'mild',\n",
1640 | " 'beef flavor': 'mild',\n",
1641 | " 'mi goreng': 'mild',\n",
1642 | " 'black pepper': 'hot',\n",
1643 | " 'soy sauce': 'mild',\n",
1644 | " 'chow mein': 'mild', \n",
1645 | " 'shoyu': 'mild',\n",
1646 | " 'korean': 'hot',\n",
1647 | " 'pork flavor': 'mild',\n",
1648 | " 'shio': 'mild', \n",
1649 | " 'shrimp flavour':'mild', \n",
1650 | " 'crab flavour':'mild',\n",
1651 | " 'seafood flavour': 'mild',\n",
1652 | " 'creamy' : 'mild',\n",
1653 | " 'demae': 'mild',\n",
1654 | " 'xo sauce': 'mild',\n",
1655 | " 'seafood flavour': 'mild',\n",
1656 | " 'udon': 'mild',\n",
1657 | " 'tempura': 'mild',\n",
1658 | " 'kimchi': 'hot',\n",
1659 | " 'sesame oil': 'mild',\n",
1660 | " 'artificial pork': 'mild',\n",
1661 | " 'sichuan': 'hot',\n",
1662 | " 'tonkotsu': 'mild',\n",
1663 | " 'chicken soup': 'mild',\n",
1664 | " 'mi segera': 'mild',\n",
1665 | " 'penang white': 'hot',\n",
1666 | " 'curry': 'mild',\n",
1667 | " 'yum kung': 'hot',\n",
1668 | " 'soy paste': 'mild',\n",
1669 | " 'laksa': 'hot',\n",
1670 | " 'artificial': 'mild',\n",
1671 | " 'pancit canton': 'mild',\n",
1672 | " 'sweet potato': 'mild',\n",
1673 | " 'kuah rasa': 'mild',\n",
1674 | " 'rasa ayam': 'mild',\n",
1675 | " 'buldak': 'hot',\n",
1676 | " 'bokkeummyun': 'hot',\n",
1677 | " 'shin ramyun': 'hot',\n",
1678 | " 'tom yam': 'hot',\n",
1679 | " 'demae iccho': 'mild',\n",
1680 | " 'sopa nissin': 'mild',\n",
1681 | " 'malaysia penang': 'hot',\n",
1682 | " 'south korean': 'hot',\n",
1683 | " 'fiery': 'hot'\n",
1684 | "}\n",
1685 | "\n",
1686 | "\n",
1687 | "spicy_levels = ['mild','medium','spicy','hot']\n",
1688 | "def spiciness(text):\n",
1689 | " # catch if the spciniess is directly in the text\n",
1690 | " text = re.sub('r[^a-zA-Z]', ' ', text.lower())\n",
1691 | " text = re.sub(' +', ' ', text)\n",
1692 | " for spice in spicy_levels:\n",
1693 | " if spice in text:\n",
1694 | " return spice\n",
1695 | " \n",
1696 | " # catch if the spiciness is implied in the text \n",
1697 | " words = text.split()\n",
1698 | " for j in range(len(words) - 1):\n",
1699 | " pair = words[j] + ' ' + words[j+1]\n",
1700 | " if pair in spiciness_dict:\n",
1701 | " return spiciness_dict[pair]\n",
1702 | " return 'pepper free'\n",
1703 | "\n",
1704 | "\n",
1705 | "df['spiciness_2'] = df['Variety'].apply(spiciness)\n",
1706 | "df['spiciness_2'].value_counts()"
1707 | ]
1708 | },
1709 | {
1710 | "cell_type": "code",
1711 | "execution_count": 12,
1712 | "metadata": {},
1713 | "outputs": [
1714 | {
1715 | "data": {
1716 | "text/html": [
1717 | "\n",
1718 | "\n",
1731 | "
\n",
1732 | " \n",
1733 | " \n",
1734 | " | \n",
1735 | " Brand | \n",
1736 | " Variety | \n",
1737 | " Style | \n",
1738 | " Country | \n",
1739 | " Stars | \n",
1740 | " spiciness | \n",
1741 | " spiciness_2 | \n",
1742 | "
\n",
1743 | " \n",
1744 | " \n",
1745 | " \n",
1746 | " | 0 | \n",
1747 | " MIT | \n",
1748 | " Shallot Sauce Dry Noodle | \n",
1749 | " Pack | \n",
1750 | " Taiwan | \n",
1751 | " 3 | \n",
1752 | " pepper free | \n",
1753 | " pepper free | \n",
1754 | "
\n",
1755 | " \n",
1756 | " | 1 | \n",
1757 | " Sapporo Ichiban | \n",
1758 | " Tonkotsu Ramen Japanese Style Noodles | \n",
1759 | " Bowl | \n",
1760 | " United States | \n",
1761 | " 4.5 | \n",
1762 | " pepper free | \n",
1763 | " mild | \n",
1764 | "
\n",
1765 | " \n",
1766 | " | 2 | \n",
1767 | " Binh Tay | \n",
1768 | " Mi Kiwi | \n",
1769 | " Bowl | \n",
1770 | " Vietnam | \n",
1771 | " 3 | \n",
1772 | " pepper free | \n",
1773 | " pepper free | \n",
1774 | "
\n",
1775 | " \n",
1776 | " | 3 | \n",
1777 | " Charming Couple | \n",
1778 | " Biang Biang Scallion Chicken Sauce | \n",
1779 | " Pack | \n",
1780 | " Taiwan | \n",
1781 | " 4.5 | \n",
1782 | " pepper free | \n",
1783 | " pepper free | \n",
1784 | "
\n",
1785 | " \n",
1786 | " | 4 | \n",
1787 | " immi | \n",
1788 | " Tom Yum Shrimp Flavor Ramen Soup | \n",
1789 | " Pack | \n",
1790 | " United States | \n",
1791 | " 2.75 | \n",
1792 | " pepper free | \n",
1793 | " hot | \n",
1794 | "
\n",
1795 | " \n",
1796 | " | ... | \n",
1797 | " ... | \n",
1798 | " ... | \n",
1799 | " ... | \n",
1800 | " ... | \n",
1801 | " ... | \n",
1802 | " ... | \n",
1803 | " ... | \n",
1804 | "
\n",
1805 | " \n",
1806 | " | 4115 | \n",
1807 | " Vifon | \n",
1808 | " Hu Tiu Nam Vang [\"Phnom Penh\" style] Asian Sty... | \n",
1809 | " Bowl | \n",
1810 | " Vietnam | \n",
1811 | " 3.5 | \n",
1812 | " pepper free | \n",
1813 | " pepper free | \n",
1814 | "
\n",
1815 | " \n",
1816 | " | 4116 | \n",
1817 | " Wai Wai | \n",
1818 | " Oriental Style Instant Noodles | \n",
1819 | " Pack | \n",
1820 | " Thailand | \n",
1821 | " 1 | \n",
1822 | " pepper free | \n",
1823 | " pepper free | \n",
1824 | "
\n",
1825 | " \n",
1826 | " | 4117 | \n",
1827 | " Wai Wai | \n",
1828 | " Tom Yum Shrimp | \n",
1829 | " Pack | \n",
1830 | " Thailand | \n",
1831 | " 2 | \n",
1832 | " pepper free | \n",
1833 | " hot | \n",
1834 | "
\n",
1835 | " \n",
1836 | " | 4118 | \n",
1837 | " Wai Wai | \n",
1838 | " Tom Yum Chili Flavor | \n",
1839 | " Pack | \n",
1840 | " Thailand | \n",
1841 | " 2 | \n",
1842 | " pepper free | \n",
1843 | " hot | \n",
1844 | "
\n",
1845 | " \n",
1846 | " | 4119 | \n",
1847 | " Westbrae | \n",
1848 | " Miso Ramen | \n",
1849 | " Pack | \n",
1850 | " United States | \n",
1851 | " 0.5 | \n",
1852 | " pepper free | \n",
1853 | " pepper free | \n",
1854 | "
\n",
1855 | " \n",
1856 | "
\n",
1857 | "
4120 rows × 7 columns
\n",
1858 | "
"
1859 | ],
1860 | "text/plain": [
1861 | " Brand Variety \\\n",
1862 | "0 MIT Shallot Sauce Dry Noodle \n",
1863 | "1 Sapporo Ichiban Tonkotsu Ramen Japanese Style Noodles \n",
1864 | "2 Binh Tay Mi Kiwi \n",
1865 | "3 Charming Couple Biang Biang Scallion Chicken Sauce \n",
1866 | "4 immi Tom Yum Shrimp Flavor Ramen Soup \n",
1867 | "... ... ... \n",
1868 | "4115 Vifon Hu Tiu Nam Vang [\"Phnom Penh\" style] Asian Sty... \n",
1869 | "4116 Wai Wai Oriental Style Instant Noodles \n",
1870 | "4117 Wai Wai Tom Yum Shrimp \n",
1871 | "4118 Wai Wai Tom Yum Chili Flavor \n",
1872 | "4119 Westbrae Miso Ramen \n",
1873 | "\n",
1874 | " Style Country Stars spiciness spiciness_2 \n",
1875 | "0 Pack Taiwan 3 pepper free pepper free \n",
1876 | "1 Bowl United States 4.5 pepper free mild \n",
1877 | "2 Bowl Vietnam 3 pepper free pepper free \n",
1878 | "3 Pack Taiwan 4.5 pepper free pepper free \n",
1879 | "4 Pack United States 2.75 pepper free hot \n",
1880 | "... ... ... ... ... ... \n",
1881 | "4115 Bowl Vietnam 3.5 pepper free pepper free \n",
1882 | "4116 Pack Thailand 1 pepper free pepper free \n",
1883 | "4117 Pack Thailand 2 pepper free hot \n",
1884 | "4118 Pack Thailand 2 pepper free hot \n",
1885 | "4119 Pack United States 0.5 pepper free pepper free \n",
1886 | "\n",
1887 | "[4120 rows x 7 columns]"
1888 | ]
1889 | },
1890 | "execution_count": 12,
1891 | "metadata": {},
1892 | "output_type": "execute_result"
1893 | }
1894 | ],
1895 | "source": []
1896 | },
1897 | {
1898 | "cell_type": "markdown",
1899 | "metadata": {},
1900 | "source": [
1901 | "# separate by region"
1902 | ]
1903 | },
1904 | {
1905 | "cell_type": "markdown",
1906 | "metadata": {},
1907 | "source": [
1908 | "'https://worldpopulationreview.com/country-rankings/list-of-countries-by-continent'"
1909 | ]
1910 | },
1911 | {
1912 | "cell_type": "code",
1913 | "execution_count": 38,
1914 | "metadata": {
1915 | "scrolled": true
1916 | },
1917 | "outputs": [
1918 | {
1919 | "data": {
1920 | "text/html": [
1921 | "\n",
1922 | "\n",
1935 | "
\n",
1936 | " \n",
1937 | " \n",
1938 | " | \n",
1939 | " country | \n",
1940 | " continent | \n",
1941 | "
\n",
1942 | " \n",
1943 | " \n",
1944 | " \n",
1945 | " | 0 | \n",
1946 | " algeria | \n",
1947 | " Africa | \n",
1948 | "
\n",
1949 | " \n",
1950 | " | 1 | \n",
1951 | " angola | \n",
1952 | " Africa | \n",
1953 | "
\n",
1954 | " \n",
1955 | " | 2 | \n",
1956 | " benin | \n",
1957 | " Africa | \n",
1958 | "
\n",
1959 | " \n",
1960 | " | 3 | \n",
1961 | " botswana | \n",
1962 | " Africa | \n",
1963 | "
\n",
1964 | " \n",
1965 | " | 4 | \n",
1966 | " burkinafaso | \n",
1967 | " Africa | \n",
1968 | "
\n",
1969 | " \n",
1970 | " | ... | \n",
1971 | " ... | \n",
1972 | " ... | \n",
1973 | "
\n",
1974 | " \n",
1975 | " | 229 | \n",
1976 | " paraguay | \n",
1977 | " South America | \n",
1978 | "
\n",
1979 | " \n",
1980 | " | 230 | \n",
1981 | " peru | \n",
1982 | " South America | \n",
1983 | "
\n",
1984 | " \n",
1985 | " | 231 | \n",
1986 | " suriname | \n",
1987 | " South America | \n",
1988 | "
\n",
1989 | " \n",
1990 | " | 232 | \n",
1991 | " uruguay | \n",
1992 | " South America | \n",
1993 | "
\n",
1994 | " \n",
1995 | " | 233 | \n",
1996 | " venezuela | \n",
1997 | " South America | \n",
1998 | "
\n",
1999 | " \n",
2000 | "
\n",
2001 | "
234 rows × 2 columns
\n",
2002 | "
"
2003 | ],
2004 | "text/plain": [
2005 | " country continent\n",
2006 | "0 algeria Africa\n",
2007 | "1 angola Africa\n",
2008 | "2 benin Africa\n",
2009 | "3 botswana Africa\n",
2010 | "4 burkinafaso Africa\n",
2011 | ".. ... ...\n",
2012 | "229 paraguay South America\n",
2013 | "230 peru South America\n",
2014 | "231 suriname South America\n",
2015 | "232 uruguay South America\n",
2016 | "233 venezuela South America\n",
2017 | "\n",
2018 | "[234 rows x 2 columns]"
2019 | ]
2020 | },
2021 | "execution_count": 38,
2022 | "metadata": {},
2023 | "output_type": "execute_result"
2024 | }
2025 | ],
2026 | "source": [
2027 | "continent = pd.read_csv('country_by_continent.csv')\n",
2028 | "continent['country'] = continent['country'].str.lower().str.replace(' ','')\n",
2029 | "df['country'] = df['Country'].str.lower().str.replace(' ','')\n",
2030 | "continent"
2031 | ]
2032 | },
2033 | {
2034 | "cell_type": "code",
2035 | "execution_count": 36,
2036 | "metadata": {},
2037 | "outputs": [
2038 | {
2039 | "data": {
2040 | "text/html": [
2041 | "\n",
2042 | "\n",
2055 | "
\n",
2056 | " \n",
2057 | " \n",
2058 | " | \n",
2059 | " Brand | \n",
2060 | " Variety | \n",
2061 | " Style | \n",
2062 | " Country | \n",
2063 | " Stars | \n",
2064 | " spiciness | \n",
2065 | " spiciness_2 | \n",
2066 | " continent | \n",
2067 | "
\n",
2068 | " \n",
2069 | " \n",
2070 | " \n",
2071 | " | 0 | \n",
2072 | " MIT | \n",
2073 | " Shallot Sauce Dry Noodle | \n",
2074 | " Pack | \n",
2075 | " Taiwan | \n",
2076 | " 3 | \n",
2077 | " pepper free | \n",
2078 | " pepper free | \n",
2079 | " Asia | \n",
2080 | "
\n",
2081 | " \n",
2082 | " | 1 | \n",
2083 | " Charming Couple | \n",
2084 | " Biang Biang Scallion Chicken Sauce | \n",
2085 | " Pack | \n",
2086 | " Taiwan | \n",
2087 | " 4.5 | \n",
2088 | " pepper free | \n",
2089 | " pepper free | \n",
2090 | " Asia | \n",
2091 | "
\n",
2092 | " \n",
2093 | " | 2 | \n",
2094 | " Hi Lai Foods | \n",
2095 | " Lai Noodle Vegan Sesame Paste Flavor | \n",
2096 | " Pack | \n",
2097 | " Taiwan | \n",
2098 | " 5 | \n",
2099 | " pepper free | \n",
2100 | " pepper free | \n",
2101 | " Asia | \n",
2102 | "
\n",
2103 | " \n",
2104 | " | 3 | \n",
2105 | " Ve Wong | \n",
2106 | " Artificial Peppered Beef Flavor | \n",
2107 | " Pack | \n",
2108 | " Taiwan | \n",
2109 | " 3.5 | \n",
2110 | " pepper free | \n",
2111 | " mild | \n",
2112 | " Asia | \n",
2113 | "
\n",
2114 | " \n",
2115 | " | 4 | \n",
2116 | " iNoodle | \n",
2117 | " Flat Noodle Soy Sauce Flavor | \n",
2118 | " Pack | \n",
2119 | " Taiwan | \n",
2120 | " 3.25 | \n",
2121 | " pepper free | \n",
2122 | " mild | \n",
2123 | " Asia | \n",
2124 | "
\n",
2125 | " \n",
2126 | " | ... | \n",
2127 | " ... | \n",
2128 | " ... | \n",
2129 | " ... | \n",
2130 | " ... | \n",
2131 | " ... | \n",
2132 | " ... | \n",
2133 | " ... | \n",
2134 | " ... | \n",
2135 | "
\n",
2136 | " \n",
2137 | " | 4022 | \n",
2138 | " Nissin | \n",
2139 | " Sabor A Pollo Sopa Instantánea Con Fideos | \n",
2140 | " Pack | \n",
2141 | " Colombia | \n",
2142 | " 3.25 | \n",
2143 | " pepper free | \n",
2144 | " pepper free | \n",
2145 | " South America | \n",
2146 | "
\n",
2147 | " \n",
2148 | " | 4023 | \n",
2149 | " Nissin | \n",
2150 | " Cup Noodles Sabor A Gallina | \n",
2151 | " Cup | \n",
2152 | " Colombia | \n",
2153 | " 3.5 | \n",
2154 | " pepper free | \n",
2155 | " pepper free | \n",
2156 | " South America | \n",
2157 | "
\n",
2158 | " \n",
2159 | " | 4024 | \n",
2160 | " Nissin | \n",
2161 | " Sabor A Carne Sopa Instantánea Con Fideos | \n",
2162 | " Pack | \n",
2163 | " Colombia | \n",
2164 | " 3.75 | \n",
2165 | " pepper free | \n",
2166 | " pepper free | \n",
2167 | " South America | \n",
2168 | "
\n",
2169 | " \n",
2170 | " | 4025 | \n",
2171 | " Baltix | \n",
2172 | " Instant Noodles With Chicken Flavour Broth | \n",
2173 | " Pack | \n",
2174 | " Estonia | \n",
2175 | " 3.75 | \n",
2176 | " pepper free | \n",
2177 | " pepper free | \n",
2178 | " Europe | \n",
2179 | "
\n",
2180 | " \n",
2181 | " | 4026 | \n",
2182 | " Baltix | \n",
2183 | " Instant Noodles With Beef Flavour Broth | \n",
2184 | " Pack | \n",
2185 | " Estonia | \n",
2186 | " 3.25 | \n",
2187 | " pepper free | \n",
2188 | " pepper free | \n",
2189 | " Europe | \n",
2190 | "
\n",
2191 | " \n",
2192 | "
\n",
2193 | "
4027 rows × 8 columns
\n",
2194 | "
"
2195 | ],
2196 | "text/plain": [
2197 | " Brand Variety Style \\\n",
2198 | "0 MIT Shallot Sauce Dry Noodle Pack \n",
2199 | "1 Charming Couple Biang Biang Scallion Chicken Sauce Pack \n",
2200 | "2 Hi Lai Foods Lai Noodle Vegan Sesame Paste Flavor Pack \n",
2201 | "3 Ve Wong Artificial Peppered Beef Flavor Pack \n",
2202 | "4 iNoodle Flat Noodle Soy Sauce Flavor Pack \n",
2203 | "... ... ... ... \n",
2204 | "4022 Nissin Sabor A Pollo Sopa Instantánea Con Fideos Pack \n",
2205 | "4023 Nissin Cup Noodles Sabor A Gallina Cup \n",
2206 | "4024 Nissin Sabor A Carne Sopa Instantánea Con Fideos Pack \n",
2207 | "4025 Baltix Instant Noodles With Chicken Flavour Broth Pack \n",
2208 | "4026 Baltix Instant Noodles With Beef Flavour Broth Pack \n",
2209 | "\n",
2210 | " Country Stars spiciness spiciness_2 continent \n",
2211 | "0 Taiwan 3 pepper free pepper free Asia \n",
2212 | "1 Taiwan 4.5 pepper free pepper free Asia \n",
2213 | "2 Taiwan 5 pepper free pepper free Asia \n",
2214 | "3 Taiwan 3.5 pepper free mild Asia \n",
2215 | "4 Taiwan 3.25 pepper free mild Asia \n",
2216 | "... ... ... ... ... ... \n",
2217 | "4022 Colombia 3.25 pepper free pepper free South America \n",
2218 | "4023 Colombia 3.5 pepper free pepper free South America \n",
2219 | "4024 Colombia 3.75 pepper free pepper free South America \n",
2220 | "4025 Estonia 3.75 pepper free pepper free Europe \n",
2221 | "4026 Estonia 3.25 pepper free pepper free Europe \n",
2222 | "\n",
2223 | "[4027 rows x 8 columns]"
2224 | ]
2225 | },
2226 | "execution_count": 36,
2227 | "metadata": {},
2228 | "output_type": "execute_result"
2229 | }
2230 | ],
2231 | "source": [
2232 | "new_df = pd.merge(df, continent, on=\"country\")\n",
2233 | "new_df = new_df.drop(columns=['country'])\n",
2234 | "new_df"
2235 | ]
2236 | }
2237 | ],
2238 | "metadata": {
2239 | "kernelspec": {
2240 | "display_name": "Python 3 (ipykernel)",
2241 | "language": "python",
2242 | "name": "python3"
2243 | },
2244 | "language_info": {
2245 | "codemirror_mode": {
2246 | "name": "ipython",
2247 | "version": 3
2248 | },
2249 | "file_extension": ".py",
2250 | "mimetype": "text/x-python",
2251 | "name": "python",
2252 | "nbconvert_exporter": "python",
2253 | "pygments_lexer": "ipython3",
2254 | "version": "3.9.5"
2255 | }
2256 | },
2257 | "nbformat": 4,
2258 | "nbformat_minor": 2
2259 | }
2260 |
--------------------------------------------------------------------------------