├── 01_PQ_YT_Notebook.ipynb
├── 01_ValueInvestorHoldings31122021.xlsx
├── 02_DataCleaning_Video.ipynb
├── 05_ValueInvestorHoldings31122022_2.xlsx
├── 06_ValueInvestorHoldings31032023.xlsx
├── 28_Lost_Customers_NW_Video.pbix
├── 28_New_Customers_NW_video.pbix
├── ActualDataset.xlsx
├── BaSensei_webscrape_Bitcoin_YTVideo.ipynb
├── BrokerStatement.xlsx
├── BrokerStatement2.xlsx
├── BrokerStatement3.xlsx
├── CleanderDataScience.csv
├── DAX_Dates.txt
├── DataProfiling_Video.ipynb
├── DataScientist2.csv
├── DataScientist3.html
├── File1.xlsx
├── File3.xlsx
├── Holdings_30062022.xlsx
├── Holdings_30092022.xlsx
├── Holdings_31032022.xlsx
├── Holdings_31122022.xlsx
├── HorizontalDatasheet.xlsx
├── PBI_CrossOverChart_SVB_CS.pbix
├── PBI_Holdings_Source.xlsx
├── PBI_MOM_Video.pbix
├── PBI_Simple_Moving_Average_Video.pbix
├── PQ_Advanced_Grouping_With_Max.xlsx
├── PQ_AllSeasons_Video_2.xlsx
├── PQ_BUFFER_VIDEO.xlsx
├── PQ_BankStatement_Video.xlsx
├── PQ_BlankRow_video.xlsx
├── PQ_Budget_Video.xlsx
├── PQ_ChatGPT_MarketVideo.pbix
├── PQ_ClosestNextHoliday_Video.xlsx
├── PQ_ColumnGroups_BaSensei_Video.xlsx
├── PQ_Consecutivenumbers_Video2.xlsx
├── PQ_Counter_Column_Video.xlsx
├── PQ_DOUBLE_BARREL_2_Video.xlsx
├── PQ_DoubleBarrel_Video.xlsx
├── PQ_Double_Headers_YT_Video.xlsx
├── PQ_Dups_Video.xlsx
├── PQ_DynamicSplit_Video.xlsx
├── PQ_Dynamic_Split_Header_Names.xlsx
├── PQ_Dynamic_T_B_Video.xlsx
├── PQ_EACH_VIDEO.xlsx
├── PQ_EMAIL_VIDEO.xlsx
├── PQ_Expense_Allocations_Video.xlsx
├── PQ_FILTER_PARAM_VIDEO.xlsx
├── PQ_FILTER_TEXT_INPUT_OR_POSITION.xlsx
├── PQ_Filter_Before_Expand_Video.xlsx
├── PQ_Filter_Columns_Once_Video.xlsx
├── PQ_Grouping_Video_2.xlsx
├── PQ_HorzontalStack_Video.xlsx
├── PQ_Jagged_Stacked_Video.xlsx
├── PQ_Jagged_Tables_Source.xlsx
├── PQ_JunkRows_Video.xlsx
├── PQ_LOOPING_VIDEO_2.pbix
├── PQ_LastNTotal_Video.xlsx
├── PQ_ListAccum_NewColumns_Video.xlsx
├── PQ_ListAlernate_Video.xlsx
├── PQ_Listaccumulate_video.xlsx
├── PQ_LsatDate_Video.xlsx
├── PQ_Max_Value_Row_Video.xlsx
├── PQ_Merge_Video.xlsx
├── PQ_OCCURANCE_COUNT_VIDEO.xlsx
├── PQ_OpenAI_Pyhton.xlsx
├── PQ_PadMiddle_Video.xlsx
├── PQ_ParsingDelimitedDataToTable_Video2.xlsx
├── PQ_Pattern_Extraction_Video.xlsx
├── PQ_PercentageOfTotal_Video.xlsx
├── PQ_Portfolio_Comparer_Video.xlsx
├── PQ_PreviousRowsEmployeeTimeLine.xlsx
├── PQ_PreviousRowsEmployeeTimeLine_Video.xlsx
├── PQ_RatingsData.xlsx
├── PQ_ReportheaderIntoReportVideo.xlsx
├── PQ_ReverseFillDown_Video.xlsx
├── PQ_SPRatings_ListAccum_Source_Video2.xlsx
├── PQ_Song_List_Stacked_Video.xlsx
├── PQ_SourceData_Portfolios.xlsx
├── PQ_StepsProcess_Video.xlsx
├── PQ_Stock_Groupings_Video.xlsx
├── PQ_Subtotals_Video.xlsx
├── PQ_TextReverseSorting_Video.xlsx
├── PQ_Unstack_Data.xlsx
├── PQ_Unstack_Data_Uneven_Video.xlsx
├── PQ_VALUE_ALL_COLUMNS_VIDEO.xlsx
├── PQ_Working Days_2_Video.xlsx
├── PQ_Working Hours_Video.xlsx
├── PQ_bulkReplace_YT_Video.xlsx
├── PQ_combineWithdifferntColumnNames.xlsx
├── Portfolios1.xlsx
├── Portfolios2.xlsx
├── Portfolios3.xlsx
├── PowerQuery_Dynamically_Clean_DataSet_Headers.xlsx
├── Pq_Address_Split_Video.xlsx
├── Pq_AttribuetDescription_Video.xlsx
├── Pq_Conditionally_Replace_Video.xlsx
├── Pq_DynamicSortColumns_Video.xlsx
├── Pq_DynamicTRansformColumnNames_Video.xlsx
├── Pq_Name_Changes_Video.xlsx
├── Pq_Portfolio_Video.xlsx
├── Pq_Query_Referencing_Video.xlsx
├── Pq_SplitDynamically_Source.xlsx
├── StockQuotesData.xlsx
├── StockQuotes_SVB_CS.xlsx
├── Trades_video.xlsx
├── pattern2Video.xlsx
├── us_stock_sales_2024.csv
└── us_stock_sales_2024_extended.csv
/01_PQ_YT_Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 4,
6 | "metadata": {
7 | "collapsed": true,
8 | "ExecuteTime": {
9 | "end_time": "2023-11-24T12:06:28.192716100Z",
10 | "start_time": "2023-11-24T12:06:28.178737Z"
11 | }
12 | },
13 | "outputs": [],
14 | "source": [
15 | "import openai\n",
16 | "import os"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 5,
22 | "outputs": [],
23 | "source": [
24 | "API_Key = \"xxxxx\"\n",
25 | "openai.api_key = API_Key"
26 | ],
27 | "metadata": {
28 | "collapsed": false,
29 | "ExecuteTime": {
30 | "end_time": "2023-11-24T12:06:56.082223300Z",
31 | "start_time": "2023-11-24T12:06:56.079230400Z"
32 | }
33 | }
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 7,
38 | "outputs": [],
39 | "source": [
40 | "openai.api_key = os.getenv(\"OPENAI_API_KEY\")"
41 | ],
42 | "metadata": {
43 | "collapsed": false,
44 | "ExecuteTime": {
45 | "end_time": "2023-11-24T12:08:05.620389800Z",
46 | "start_time": "2023-11-24T12:08:05.605957900Z"
47 | }
48 | }
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 9,
53 | "outputs": [],
54 | "source": [
55 | "response = openai.ChatCompletion.create(\n",
56 | " model=\"gpt-3.5-turbo\",\n",
57 | " messages = [{\"role\":\"user\",\"content\": \"Hello World!\"}],\n",
58 | " temperature=1,\n",
59 | " max_tokens=600,\n",
60 | " top_p=1,\n",
61 | " frequency_penalty=0,\n",
62 | " presence_penalty=0\n",
63 | ")"
64 | ],
65 | "metadata": {
66 | "collapsed": false,
67 | "ExecuteTime": {
68 | "end_time": "2023-11-24T12:09:45.928571500Z",
69 | "start_time": "2023-11-24T12:09:45.245885700Z"
70 | }
71 | }
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 10,
76 | "outputs": [
77 | {
78 | "name": "stdout",
79 | "output_type": "stream",
80 | "text": [
81 | "{\n",
82 | " \"id\": \"chatcmpl-8OPI8QjpySYFAf9U2zxOKKbZC5Bg0\",\n",
83 | " \"object\": \"chat.completion\",\n",
84 | " \"created\": 1700827784,\n",
85 | " \"model\": \"gpt-3.5-turbo-0613\",\n",
86 | " \"choices\": [\n",
87 | " {\n",
88 | " \"index\": 0,\n",
89 | " \"message\": {\n",
90 | " \"role\": \"assistant\",\n",
91 | " \"content\": \"Hello! How can I assist you today?\"\n",
92 | " },\n",
93 | " \"finish_reason\": \"stop\"\n",
94 | " }\n",
95 | " ],\n",
96 | " \"usage\": {\n",
97 | " \"prompt_tokens\": 10,\n",
98 | " \"completion_tokens\": 9,\n",
99 | " \"total_tokens\": 19\n",
100 | " }\n",
101 | "}\n"
102 | ]
103 | }
104 | ],
105 | "source": [
106 | "print(response)"
107 | ],
108 | "metadata": {
109 | "collapsed": false,
110 | "ExecuteTime": {
111 | "end_time": "2023-11-24T12:09:55.968293500Z",
112 | "start_time": "2023-11-24T12:09:55.964800800Z"
113 | }
114 | }
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 12,
119 | "outputs": [
120 | {
121 | "data": {
122 | "text/plain": "'Hello! How can I assist you today?'"
123 | },
124 | "execution_count": 12,
125 | "metadata": {},
126 | "output_type": "execute_result"
127 | }
128 | ],
129 | "source": [
130 | "response[\"choices\"][0][\"message\"][\"content\"]"
131 | ],
132 | "metadata": {
133 | "collapsed": false,
134 | "ExecuteTime": {
135 | "end_time": "2023-11-24T12:10:56.926079200Z",
136 | "start_time": "2023-11-24T12:10:56.917035700Z"
137 | }
138 | }
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 24,
143 | "outputs": [],
144 | "source": [
145 | "def ask_chatgpt(messages):\n",
146 | " model_response = openai.ChatCompletion.create(\n",
147 | " model=\"gpt-3.5-turbo\",\n",
148 | " messages=messages,\n",
149 | " temperature=1,\n",
150 | " max_tokens=600,\n",
151 | " top_p=1,\n",
152 | " frequency_penalty=0,\n",
153 | " presence_penalty=0\n",
154 | " )\n",
155 | " return model_response[\"choices\"][0][\"message\"][\"content\"]"
156 | ],
157 | "metadata": {
158 | "collapsed": false,
159 | "ExecuteTime": {
160 | "end_time": "2023-11-24T12:25:18.049776100Z",
161 | "start_time": "2023-11-24T12:25:18.034961700Z"
162 | }
163 | }
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 25,
168 | "outputs": [],
169 | "source": [
170 | "prompt_role='You are a Financial Analyst who needs to provide Market updates for your clients. Context : the daily market information is provided to you daily. Task : Summarize in 140 words the following Market news for a twitter post. The Market news to summarize:'"
171 | ],
172 | "metadata": {
173 | "collapsed": false,
174 | "ExecuteTime": {
175 | "end_time": "2023-11-24T12:25:35.608621800Z",
176 | "start_time": "2023-11-24T12:25:35.599095Z"
177 | }
178 | }
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 26,
183 | "outputs": [],
184 | "source": [
185 | "Commentary = 'The JSE ended weaker on Tuesday, reversing some of the gains from the previous day. The market was pulled down by resource and industrial stocks. On the currency front, the rand traded firmer against the dollar, mainly supported by recent hawkish remarks from the South African Reserve Bank ahead of a crucial budget statement. The JS E All Share and blue-chip Top 40 Index decreased by 0.3% and 0.6%. European markets closed broadly stronger as investors reacted to a slew of earnings updates and economic data from the region. The UK’s FTSE 100 declined by 0.1%, while Germany’s DAX and France’s CAC 40 advanced by 0.6% and 0.9% respectively. US stocks moved mostly higher over the course of the trading session following some early weakness. Selling pressure faded shortly after the start of trading, as investors seemed reluctant to make significant moves ahead of the Federal Reserves monetary policy announcement on Wednesday. The S&P 500 and the Nasdaq Composite jumped by 0.6% and 0.5%, while the Dow Jones Industrial Average improved by 0.4%. Asian markets are up this morning. The Nikkei, the Hang Seng and the Shanghai Composite are 2.1%, 0.1% and 0.3% in the green.'"
186 | ],
187 | "metadata": {
188 | "collapsed": false,
189 | "ExecuteTime": {
190 | "end_time": "2023-11-24T12:25:56.068731600Z",
191 | "start_time": "2023-11-24T12:25:56.055501700Z"
192 | }
193 | }
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 27,
198 | "outputs": [
199 | {
200 | "data": {
201 | "text/plain": "196"
202 | },
203 | "execution_count": 27,
204 | "metadata": {},
205 | "output_type": "execute_result"
206 | }
207 | ],
208 | "source": [
209 | "len(Commentary.split())"
210 | ],
211 | "metadata": {
212 | "collapsed": false,
213 | "ExecuteTime": {
214 | "end_time": "2023-11-24T12:26:11.427217800Z",
215 | "start_time": "2023-11-24T12:26:11.411246500Z"
216 | }
217 | }
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 28,
222 | "outputs": [],
223 | "source": [
224 | "def assist_analyst(prole, MainComment):\n",
225 | " prompt = f'{prole}{MainComment}'\n",
226 | " return ask_chatgpt([{\"role\": \"user\", \"content\": prompt}])\n"
227 | ],
228 | "metadata": {
229 | "collapsed": false,
230 | "ExecuteTime": {
231 | "end_time": "2023-11-24T12:28:37.191711800Z",
232 | "start_time": "2023-11-24T12:28:37.186727100Z"
233 | }
234 | }
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 29,
239 | "outputs": [
240 | {
241 | "data": {
242 | "text/plain": "\"Market Update: JSE weakened, dragged by resource and industrial stocks. Rand traded firmer supported by hawkish statements from SA Reserve Bank ahead of budget statement. European markets closed stronger on earnings and economic data. UK's FTSE 100 declined, Germany's DAX and France's CAC 40 advanced. US stocks mostly higher, investors cautious before Fed's policy announcement. S&P 500 and Nasdaq Composite jumped, Dow Jones Industrial Average improved. Asian markets up this morning, Nikkei, Hang Seng, and Shanghai Composite in the green. #marketnews #JSE #rand #Europeanmarkets #USstocks #Asianmarkets\""
243 | },
244 | "execution_count": 29,
245 | "metadata": {},
246 | "output_type": "execute_result"
247 | }
248 | ],
249 | "source": [
250 | "assist_analyst(prompt_role, Commentary)"
251 | ],
252 | "metadata": {
253 | "collapsed": false,
254 | "ExecuteTime": {
255 | "end_time": "2023-11-24T12:28:52.661715400Z",
256 | "start_time": "2023-11-24T12:28:48.236489400Z"
257 | }
258 | }
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": 30,
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/plain": "90"
267 | },
268 | "execution_count": 30,
269 | "metadata": {},
270 | "output_type": "execute_result"
271 | }
272 | ],
273 | "source": [
274 | "len(assist_analyst(prompt_role, Commentary).split())"
275 | ],
276 | "metadata": {
277 | "collapsed": false,
278 | "ExecuteTime": {
279 | "end_time": "2023-11-24T12:29:15.726871Z",
280 | "start_time": "2023-11-24T12:29:11.926260500Z"
281 | }
282 | }
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "outputs": [],
288 | "source": [],
289 | "metadata": {
290 | "collapsed": false
291 | }
292 | }
293 | ],
294 | "metadata": {
295 | "kernelspec": {
296 | "display_name": "Python 3",
297 | "language": "python",
298 | "name": "python3"
299 | },
300 | "language_info": {
301 | "codemirror_mode": {
302 | "name": "ipython",
303 | "version": 2
304 | },
305 | "file_extension": ".py",
306 | "mimetype": "text/x-python",
307 | "name": "python",
308 | "nbconvert_exporter": "python",
309 | "pygments_lexer": "ipython2",
310 | "version": "2.7.6"
311 | }
312 | },
313 | "nbformat": 4,
314 | "nbformat_minor": 0
315 | }
316 |
--------------------------------------------------------------------------------
/01_ValueInvestorHoldings31122021.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/01_ValueInvestorHoldings31122021.xlsx
--------------------------------------------------------------------------------
/02_DataCleaning_Video.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Import Libraries"
7 | ],
8 | "metadata": {
9 | "collapsed": false
10 | }
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 193,
15 | "outputs": [],
16 | "source": [
17 | "import pandas as pd"
18 | ],
19 | "metadata": {
20 | "collapsed": false,
21 | "ExecuteTime": {
22 | "end_time": "2023-08-01T18:50:16.909365600Z",
23 | "start_time": "2023-08-01T18:50:16.894906300Z"
24 | }
25 | }
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "source": [
30 | "# Import Dataframe from CSV"
31 | ],
32 | "metadata": {
33 | "collapsed": false
34 | }
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 194,
39 | "outputs": [],
40 | "source": [
41 | "df = pd.read_csv('DataScientist2.csv')"
42 | ],
43 | "metadata": {
44 | "collapsed": false,
45 | "ExecuteTime": {
46 | "end_time": "2023-08-01T18:50:17.354097500Z",
47 | "start_time": "2023-08-01T18:50:17.246945900Z"
48 | }
49 | }
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 195,
54 | "outputs": [
55 | {
56 | "data": {
57 | "text/plain": " Unnamed: 0 index Job Title \\\n0 0 0 Senior Data Scientist \n1 1 1 Data Scientist, Product Analytics \n2 2 2 Data Science Manager \n3 3 3 Data Analyst \n4 4 4 Director, Data Science \n\n Salary Estimate \\\n0 $111K-$181K (Glassdoor est.) \n1 $111K-$181K (Glassdoor est.) \n2 $111K-$181K (Glassdoor est.) \n3 $111K-$181K (Glassdoor est.) \n4 $111K-$181K (Glassdoor est.) \n\n Job Description Rating \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... 3.5 \n1 At Noom, we use scientifically proven methods ... 4.5 \n2 Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... -1.0 \n3 NaN 3.4 \n4 Director, Data Science - (200537)\\nDescription... 3.4 \n\n Company Name Location Headquarters \\\n0 Hopper\\n3.5 New York, NY Montreal, Canada \n1 Noom US\\n4.5 New York, NY New York, NY \n2 Decode_M New York, NY New York, NY \n3 Sapphire Digital\\n3.4 Lyndhurst, NJ NaN \n4 United Entertainment Group\\n3.4 New York, NY New York, NY \n\n Size Founded Type of ownership \\\n0 501 to 1000 employees 2007 Company - Private \n1 1001 to 5000 employees 2008 Company - Private \n2 1 to 50 employees -1 Unknown \n3 201 to 500 employees 2019 Company - Private \n4 51 to 200 employees 2007 Company - Private \n\n Industry Sector \\\n0 Travel Agencies Travel & Tourism \n1 Health, Beauty, & Fitness Consumer Services \n2 -1 -1 \n3 NaN Information Technology \n4 NaN Business Services \n\n Revenue Competitors Easy Apply \n0 Unknown / Non-Applicable -1 -1 \n1 Unknown / Non-Applicable -1 -1 \n2 Unknown / Non-Applicable -1 TRUE \n3 Unknown / Non-Applicable Zocdoc, Healthgrades -1 \n4 Unknown / Non-Applicable BBDO, Grey Group, Droga5 -1 ",
58 | "text/html": "
\n\n
\n \n \n | \n Unnamed: 0 | \n index | \n Job Title | \n Salary Estimate | \n Job Description | \n Rating | \n Company Name | \n Location | \n Headquarters | \n Size | \n Founded | \n Type of ownership | \n Industry | \n Sector | \n Revenue | \n Competitors | \n Easy Apply | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n Senior Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n 3.5 | \n Hopper\\n3.5 | \n New York, NY | \n Montreal, Canada | \n 501 to 1000 employees | \n 2007 | \n Company - Private | \n Travel Agencies | \n Travel & Tourism | \n Unknown / Non-Applicable | \n -1 | \n -1 | \n
\n \n 1 | \n 1 | \n 1 | \n Data Scientist, Product Analytics | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n 4.5 | \n Noom US\\n4.5 | \n New York, NY | \n New York, NY | \n 1001 to 5000 employees | \n 2008 | \n Company - Private | \n Health, Beauty, & Fitness | \n Consumer Services | \n Unknown / Non-Applicable | \n -1 | \n -1 | \n
\n \n 2 | \n 2 | \n 2 | \n Data Science Manager | \n $111K-$181K (Glassdoor est.) | \n Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... | \n -1.0 | \n Decode_M | \n New York, NY | \n New York, NY | \n 1 to 50 employees | \n -1 | \n Unknown | \n -1 | \n -1 | \n Unknown / Non-Applicable | \n -1 | \n TRUE | \n
\n \n 3 | \n 3 | \n 3 | \n Data Analyst | \n $111K-$181K (Glassdoor est.) | \n NaN | \n 3.4 | \n Sapphire Digital\\n3.4 | \n Lyndhurst, NJ | \n NaN | \n 201 to 500 employees | \n 2019 | \n Company - Private | \n NaN | \n Information Technology | \n Unknown / Non-Applicable | \n Zocdoc, Healthgrades | \n -1 | \n
\n \n 4 | \n 4 | \n 4 | \n Director, Data Science | \n $111K-$181K (Glassdoor est.) | \n Director, Data Science - (200537)\\nDescription... | \n 3.4 | \n United Entertainment Group\\n3.4 | \n New York, NY | \n New York, NY | \n 51 to 200 employees | \n 2007 | \n Company - Private | \n NaN | \n Business Services | \n Unknown / Non-Applicable | \n BBDO, Grey Group, Droga5 | \n -1 | \n
\n \n
\n
"
59 | },
60 | "execution_count": 195,
61 | "metadata": {},
62 | "output_type": "execute_result"
63 | }
64 | ],
65 | "source": [
66 | "df.head()"
67 | ],
68 | "metadata": {
69 | "collapsed": false,
70 | "ExecuteTime": {
71 | "end_time": "2023-08-01T18:50:17.463406200Z",
72 | "start_time": "2023-08-01T18:50:17.454269600Z"
73 | }
74 | }
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 196,
79 | "outputs": [
80 | {
81 | "data": {
82 | "text/plain": "(3912, 17)"
83 | },
84 | "execution_count": 196,
85 | "metadata": {},
86 | "output_type": "execute_result"
87 | }
88 | ],
89 | "source": [
90 | "df.shape"
91 | ],
92 | "metadata": {
93 | "collapsed": false,
94 | "ExecuteTime": {
95 | "end_time": "2023-08-01T18:50:17.619805700Z",
96 | "start_time": "2023-08-01T18:50:17.604856100Z"
97 | }
98 | }
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "source": [
103 | "# 1 - Dropping Columns"
104 | ],
105 | "metadata": {
106 | "collapsed": false
107 | }
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 197,
112 | "outputs": [],
113 | "source": [
114 | "to_drop = ['index','Rating','Headquarters','Size','Founded','Type of ownership'\n",
115 | " ,'Revenue','Sector','Easy Apply','Competitors','Unnamed: 0']"
116 | ],
117 | "metadata": {
118 | "collapsed": false,
119 | "ExecuteTime": {
120 | "end_time": "2023-08-01T18:50:18.007820200Z",
121 | "start_time": "2023-08-01T18:50:17.998343700Z"
122 | }
123 | }
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 198,
128 | "outputs": [],
129 | "source": [
130 | "df.drop(to_drop, inplace=True, axis=1)"
131 | ],
132 | "metadata": {
133 | "collapsed": false,
134 | "ExecuteTime": {
135 | "end_time": "2023-08-01T18:50:18.209356300Z",
136 | "start_time": "2023-08-01T18:50:18.195895800Z"
137 | }
138 | }
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 199,
143 | "outputs": [
144 | {
145 | "data": {
146 | "text/plain": "(3912, 6)"
147 | },
148 | "execution_count": 199,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "df.shape"
155 | ],
156 | "metadata": {
157 | "collapsed": false,
158 | "ExecuteTime": {
159 | "end_time": "2023-08-01T18:50:18.410704200Z",
160 | "start_time": "2023-08-01T18:50:18.385899500Z"
161 | }
162 | }
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 200,
167 | "outputs": [
168 | {
169 | "data": {
170 | "text/plain": " Job Title Salary Estimate \\\n0 Senior Data Scientist $111K-$181K (Glassdoor est.) \n1 Data Scientist, Product Analytics $111K-$181K (Glassdoor est.) \n2 Data Science Manager $111K-$181K (Glassdoor est.) \n3 Data Analyst $111K-$181K (Glassdoor est.) \n4 Director, Data Science $111K-$181K (Glassdoor est.) \n\n Job Description \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... \n1 At Noom, we use scientifically proven methods ... \n2 Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... \n3 NaN \n4 Director, Data Science - (200537)\\nDescription... \n\n Company Name Location Industry \n0 Hopper\\n3.5 New York, NY Travel Agencies \n1 Noom US\\n4.5 New York, NY Health, Beauty, & Fitness \n2 Decode_M New York, NY -1 \n3 Sapphire Digital\\n3.4 Lyndhurst, NJ NaN \n4 United Entertainment Group\\n3.4 New York, NY NaN ",
171 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n 0 | \n Senior Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n
\n \n 1 | \n Data Scientist, Product Analytics | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n
\n \n 2 | \n Data Science Manager | \n $111K-$181K (Glassdoor est.) | \n Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... | \n Decode_M | \n New York, NY | \n -1 | \n
\n \n 3 | \n Data Analyst | \n $111K-$181K (Glassdoor est.) | \n NaN | \n Sapphire Digital\\n3.4 | \n Lyndhurst, NJ | \n NaN | \n
\n \n 4 | \n Director, Data Science | \n $111K-$181K (Glassdoor est.) | \n Director, Data Science - (200537)\\nDescription... | \n United Entertainment Group\\n3.4 | \n New York, NY | \n NaN | \n
\n \n
\n
"
172 | },
173 | "execution_count": 200,
174 | "metadata": {},
175 | "output_type": "execute_result"
176 | }
177 | ],
178 | "source": [
179 | "df.head()"
180 | ],
181 | "metadata": {
182 | "collapsed": false,
183 | "ExecuteTime": {
184 | "end_time": "2023-08-01T18:50:18.595738200Z",
185 | "start_time": "2023-08-01T18:50:18.589758100Z"
186 | }
187 | }
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "source": [
192 | "# 2 - Removing Duplicates"
193 | ],
194 | "metadata": {
195 | "collapsed": false
196 | }
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 201,
201 | "outputs": [
202 | {
203 | "data": {
204 | "text/plain": " Job Title \\\n3906 Security Analytics Data Engineer \n3907 Security Analytics Data Engineer \n3908 Patient Safety Physician or Safety Scientist -... \n3909 Security Analytics Data Engineer \n3910 Security Analytics Data Engineer \n3911 Patient Safety Physician or Safety Scientist -... \n\n Salary Estimate \\\n3906 $55K-$112K (Glassdoor est.) \n3907 $55K-$112K (Glassdoor est.) \n3908 $55K-$112K (Glassdoor est.) \n3909 $55K-$112K (Glassdoor est.) \n3910 $55K-$112K (Glassdoor est.) \n3911 $55K-$112K (Glassdoor est.) \n\n Job Description \\\n3906 Job DescriptionThe Security Analytics Data Eng... \n3907 The Security Analytics Data Engineer will inte... \n3908 Help us transform patients' lives.\\nAt UCB, we... \n3909 Job DescriptionThe Security Analytics Data Eng... \n3910 The Security Analytics Data Engineer will inte... \n3911 Help us transform patients' lives.\\nAt UCB, we... \n\n Company Name Location Industry \n3906 PDS Tech, Inc.\\n3.8 Dublin, OH Staffing & Outsourcing \n3907 Data Resource Technologies\\n4.0 Dublin, OH Accounting \n3908 UCB\\n3.7 Slough, OH Biotech & Pharmaceuticals \n3909 PDS Tech, Inc.\\n3.8 Dublin, OH Staffing & Outsourcing \n3910 Data Resource Technologies\\n4.0 Dublin, OH Accounting \n3911 UCB\\n3.7 Slough, OH Biotech & Pharmaceuticals ",
205 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n 3906 | \n Security Analytics Data Engineer | \n $55K-$112K (Glassdoor est.) | \n Job DescriptionThe Security Analytics Data Eng... | \n PDS Tech, Inc.\\n3.8 | \n Dublin, OH | \n Staffing & Outsourcing | \n
\n \n 3907 | \n Security Analytics Data Engineer | \n $55K-$112K (Glassdoor est.) | \n The Security Analytics Data Engineer will inte... | \n Data Resource Technologies\\n4.0 | \n Dublin, OH | \n Accounting | \n
\n \n 3908 | \n Patient Safety Physician or Safety Scientist -... | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\nAt UCB, we... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n 3909 | \n Security Analytics Data Engineer | \n $55K-$112K (Glassdoor est.) | \n Job DescriptionThe Security Analytics Data Eng... | \n PDS Tech, Inc.\\n3.8 | \n Dublin, OH | \n Staffing & Outsourcing | \n
\n \n 3910 | \n Security Analytics Data Engineer | \n $55K-$112K (Glassdoor est.) | \n The Security Analytics Data Engineer will inte... | \n Data Resource Technologies\\n4.0 | \n Dublin, OH | \n Accounting | \n
\n \n 3911 | \n Patient Safety Physician or Safety Scientist -... | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\nAt UCB, we... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n
\n
"
206 | },
207 | "execution_count": 201,
208 | "metadata": {},
209 | "output_type": "execute_result"
210 | }
211 | ],
212 | "source": [
213 | "df[df.duplicated(keep = False)]"
214 | ],
215 | "metadata": {
216 | "collapsed": false,
217 | "ExecuteTime": {
218 | "end_time": "2023-08-01T18:50:18.950391100Z",
219 | "start_time": "2023-08-01T18:50:18.934443800Z"
220 | }
221 | }
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 202,
226 | "outputs": [],
227 | "source": [
228 | "df.drop_duplicates(keep = 'first', inplace=True)"
229 | ],
230 | "metadata": {
231 | "collapsed": false,
232 | "ExecuteTime": {
233 | "end_time": "2023-08-01T18:50:19.180643Z",
234 | "start_time": "2023-08-01T18:50:19.165920500Z"
235 | }
236 | }
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 203,
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/plain": "Empty DataFrame\nColumns: [Job Title, Salary Estimate, Job Description, Company Name, Location, Industry]\nIndex: []",
245 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n
\n
"
246 | },
247 | "execution_count": 203,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "df[df.duplicated(keep = False)]"
254 | ],
255 | "metadata": {
256 | "collapsed": false,
257 | "ExecuteTime": {
258 | "end_time": "2023-08-01T18:50:19.396364100Z",
259 | "start_time": "2023-08-01T18:50:19.363076100Z"
260 | }
261 | }
262 | },
263 | {
264 | "cell_type": "markdown",
265 | "source": [
266 | "# 3 - Remove Irrelevant Rows"
267 | ],
268 | "metadata": {
269 | "collapsed": false
270 | }
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 204,
275 | "outputs": [
276 | {
277 | "data": {
278 | "text/plain": "21"
279 | },
280 | "execution_count": 204,
281 | "metadata": {},
282 | "output_type": "execute_result"
283 | }
284 | ],
285 | "source": [
286 | "len(df[df['Salary Estimate'].str.contains('Per Hour', case = False)])"
287 | ],
288 | "metadata": {
289 | "collapsed": false,
290 | "ExecuteTime": {
291 | "end_time": "2023-08-01T18:50:19.764144400Z",
292 | "start_time": "2023-08-01T18:50:19.750184200Z"
293 | }
294 | }
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 205,
299 | "outputs": [
300 | {
301 | "data": {
302 | "text/plain": "(3909, 6)"
303 | },
304 | "execution_count": 205,
305 | "metadata": {},
306 | "output_type": "execute_result"
307 | }
308 | ],
309 | "source": [
310 | "df.shape"
311 | ],
312 | "metadata": {
313 | "collapsed": false,
314 | "ExecuteTime": {
315 | "end_time": "2023-08-01T18:50:19.999297900Z",
316 | "start_time": "2023-08-01T18:50:19.988335Z"
317 | }
318 | }
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 206,
323 | "outputs": [],
324 | "source": [
325 | "df = df.drop(df[df['Salary Estimate'].str.contains('Per Hour', case = False) == True].index)"
326 | ],
327 | "metadata": {
328 | "collapsed": false,
329 | "ExecuteTime": {
330 | "end_time": "2023-08-01T18:50:20.225073100Z",
331 | "start_time": "2023-08-01T18:50:20.211120100Z"
332 | }
333 | }
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 207,
338 | "outputs": [
339 | {
340 | "data": {
341 | "text/plain": "(3888, 6)"
342 | },
343 | "execution_count": 207,
344 | "metadata": {},
345 | "output_type": "execute_result"
346 | }
347 | ],
348 | "source": [
349 | "df.shape"
350 | ],
351 | "metadata": {
352 | "collapsed": false,
353 | "ExecuteTime": {
354 | "end_time": "2023-08-01T18:50:20.413462400Z",
355 | "start_time": "2023-08-01T18:50:20.400996Z"
356 | }
357 | }
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 208,
362 | "outputs": [
363 | {
364 | "data": {
365 | "text/plain": "0"
366 | },
367 | "execution_count": 208,
368 | "metadata": {},
369 | "output_type": "execute_result"
370 | }
371 | ],
372 | "source": [
373 | "len(df[df['Salary Estimate'].str.contains('Per Hour', case = False)])"
374 | ],
375 | "metadata": {
376 | "collapsed": false,
377 | "ExecuteTime": {
378 | "end_time": "2023-08-01T18:50:20.626157200Z",
379 | "start_time": "2023-08-01T18:50:20.594753Z"
380 | }
381 | }
382 | },
383 | {
384 | "cell_type": "markdown",
385 | "source": [
386 | "# 4 - Removing Nulls and Blanks"
387 | ],
388 | "metadata": {
389 | "collapsed": false
390 | }
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": 209,
395 | "outputs": [
396 | {
397 | "data": {
398 | "text/plain": "Job Title 1\nSalary Estimate 0\nJob Description 1\nCompany Name 0\nLocation 0\nIndustry 2\ndtype: int64"
399 | },
400 | "execution_count": 209,
401 | "metadata": {},
402 | "output_type": "execute_result"
403 | }
404 | ],
405 | "source": [
406 | "df.isna().sum()"
407 | ],
408 | "metadata": {
409 | "collapsed": false,
410 | "ExecuteTime": {
411 | "end_time": "2023-08-01T18:50:21.006062200Z",
412 | "start_time": "2023-08-01T18:50:20.980636600Z"
413 | }
414 | }
415 | },
416 | {
417 | "cell_type": "code",
418 | "execution_count": 210,
419 | "outputs": [],
420 | "source": [
421 | "df = df.dropna(subset=['Job Description'])"
422 | ],
423 | "metadata": {
424 | "collapsed": false,
425 | "ExecuteTime": {
426 | "end_time": "2023-08-01T18:50:21.162088200Z",
427 | "start_time": "2023-08-01T18:50:21.141158300Z"
428 | }
429 | }
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": 211,
434 | "outputs": [
435 | {
436 | "data": {
437 | "text/plain": "Job Title 1\nSalary Estimate 0\nJob Description 0\nCompany Name 0\nLocation 0\nIndustry 1\ndtype: int64"
438 | },
439 | "execution_count": 211,
440 | "metadata": {},
441 | "output_type": "execute_result"
442 | }
443 | ],
444 | "source": [
445 | "df.isna().sum()"
446 | ],
447 | "metadata": {
448 | "collapsed": false,
449 | "ExecuteTime": {
450 | "end_time": "2023-08-01T18:50:21.362437800Z",
451 | "start_time": "2023-08-01T18:50:21.344498100Z"
452 | }
453 | }
454 | },
455 | {
456 | "cell_type": "code",
457 | "execution_count": 212,
458 | "outputs": [],
459 | "source": [
460 | "df = df.dropna()"
461 | ],
462 | "metadata": {
463 | "collapsed": false,
464 | "ExecuteTime": {
465 | "end_time": "2023-08-01T18:50:21.549832600Z",
466 | "start_time": "2023-08-01T18:50:21.531892700Z"
467 | }
468 | }
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": 213,
473 | "outputs": [
474 | {
475 | "data": {
476 | "text/plain": "Job Title 0\nSalary Estimate 0\nJob Description 0\nCompany Name 0\nLocation 0\nIndustry 0\ndtype: int64"
477 | },
478 | "execution_count": 213,
479 | "metadata": {},
480 | "output_type": "execute_result"
481 | }
482 | ],
483 | "source": [
484 | "df.isna().sum()"
485 | ],
486 | "metadata": {
487 | "collapsed": false,
488 | "ExecuteTime": {
489 | "end_time": "2023-08-01T18:50:21.738086500Z",
490 | "start_time": "2023-08-01T18:50:21.727123400Z"
491 | }
492 | }
493 | },
494 | {
495 | "cell_type": "code",
496 | "execution_count": 214,
497 | "outputs": [
498 | {
499 | "data": {
500 | "text/plain": " Job Title \\\n0 Senior Data Scientist \n1 Data Scientist, Product Analytics \n2 Data Science Manager \n5 Data Scientist \n7 Quantitative Research Associate \n.. ... \n98 Data Scientist \n99 Machine Learning Engineer/Scientist \n100 Point72 Healthcare Data Scientist \n101 Senior Data Scientist - Automated Marketing \n102 Data Scientist \n\n Salary Estimate \\\n0 $111K-$181K (Glassdoor est.) \n1 $111K-$181K (Glassdoor est.) \n2 $111K-$181K (Glassdoor est.) \n5 $111K-$181K (Glassdoor est.) \n7 $111K-$181K (Glassdoor est.) \n.. ... \n98 $74K-$124K (Glassdoor est.) \n99 $74K-$124K (Glassdoor est.) \n100 $74K-$124K (Glassdoor est.) \n101 $74K-$124K (Glassdoor est.) \n102 $74K-$124K (Glassdoor est.) \n\n Job Description \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... \n1 At Noom, we use scientifically proven methods ... \n2 Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... \n5 Job Brief\\n\\nThe ideal candidate will have pre... \n7 Seeking a quant to work with senior researcher... \n.. ... \n98 Data Scientist\\n\\n\\nNew York, NY | Full Time\\n... \n99 About the Position\\n\\n\\nJane Street is seeking... \n100 About Point72\\nPoint72 Asset Management is a g... \n101 We are looking for a Senior Data Scientist wit... \n102 We are the most sought after hedge fund in New... \n\n Company Name Location \\\n0 Hopper\\n3.5 New York, NY \n1 Noom US\\n4.5 New York, NY \n2 Decode_M New York, NY \n5 IFG Companies\\n2.9 New York, NY \n7 Enlightenment Research New York, NY \n.. ... ... \n98 CKM Advisors\\n2.9 New York, NY \n99 Jane Street\\n4.8 New York, NY \n100 Point72\\n3.9 New York, NY \n101 Spotify\\n3.8 New York, NY \n102 Averity\\n5.0 New York, NY \n\n Industry \n0 Travel Agencies \n1 Health, Beauty, & Fitness \n2 -1 \n5 Insurance Carriers \n7 -1 \n.. ... \n98 Consulting \n99 Investment Banking & Asset Management \n100 Investment Banking & Asset Management \n101 Internet \n102 Staffing & Outsourcing \n\n[100 rows x 6 columns]",
501 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n 0 | \n Senior Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n
\n \n 1 | \n Data Scientist, Product Analytics | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n
\n \n 2 | \n Data Science Manager | \n $111K-$181K (Glassdoor est.) | \n Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... | \n Decode_M | \n New York, NY | \n -1 | \n
\n \n 5 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies\\n2.9 | \n New York, NY | \n Insurance Carriers | \n
\n \n 7 | \n Quantitative Research Associate | \n $111K-$181K (Glassdoor est.) | \n Seeking a quant to work with senior researcher... | \n Enlightenment Research | \n New York, NY | \n -1 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 98 | \n Data Scientist | \n $74K-$124K (Glassdoor est.) | \n Data Scientist\\n\\n\\nNew York, NY | Full Time\\n... | \n CKM Advisors\\n2.9 | \n New York, NY | \n Consulting | \n
\n \n 99 | \n Machine Learning Engineer/Scientist | \n $74K-$124K (Glassdoor est.) | \n About the Position\\n\\n\\nJane Street is seeking... | \n Jane Street\\n4.8 | \n New York, NY | \n Investment Banking & Asset Management | \n
\n \n 100 | \n Point72 Healthcare Data Scientist | \n $74K-$124K (Glassdoor est.) | \n About Point72\\nPoint72 Asset Management is a g... | \n Point72\\n3.9 | \n New York, NY | \n Investment Banking & Asset Management | \n
\n \n 101 | \n Senior Data Scientist - Automated Marketing | \n $74K-$124K (Glassdoor est.) | \n We are looking for a Senior Data Scientist wit... | \n Spotify\\n3.8 | \n New York, NY | \n Internet | \n
\n \n 102 | \n Data Scientist | \n $74K-$124K (Glassdoor est.) | \n We are the most sought after hedge fund in New... | \n Averity\\n5.0 | \n New York, NY | \n Staffing & Outsourcing | \n
\n \n
\n
100 rows × 6 columns
\n
"
502 | },
503 | "execution_count": 214,
504 | "metadata": {},
505 | "output_type": "execute_result"
506 | }
507 | ],
508 | "source": [
509 | "df.head(100)"
510 | ],
511 | "metadata": {
512 | "collapsed": false,
513 | "ExecuteTime": {
514 | "end_time": "2023-08-01T18:50:21.942425300Z",
515 | "start_time": "2023-08-01T18:50:21.923488700Z"
516 | }
517 | }
518 | },
519 | {
520 | "cell_type": "code",
521 | "execution_count": 215,
522 | "outputs": [
523 | {
524 | "data": {
525 | "text/plain": " Job Title \\\n2 Data Science Manager \n7 Quantitative Research Associate \n40 Data Scientist \n44 Data Science Analyst \n55 Data Scientist, Analytics & Inference \n... ... \n3880 Data Science Technical Lead / Architect \n3882 Big Data Engineer \n3883 Columbus Opportunities: Finance, Accounting, D... \n3886 Big Data Engineer \n3897 Senior Data Engineer \n\n Salary Estimate \\\n2 $111K-$181K (Glassdoor est.) \n7 $111K-$181K (Glassdoor est.) \n40 $120K-$140K (Glassdoor est.) \n44 $120K-$140K (Glassdoor est.) \n55 $120K-$140K (Glassdoor est.) \n... ... \n3880 $39K-$86K (Glassdoor est.) \n3882 $55K-$112K (Glassdoor est.) \n3883 $55K-$112K (Glassdoor est.) \n3886 $55K-$112K (Glassdoor est.) \n3897 $55K-$112K (Glassdoor est.) \n\n Job Description \\\n2 Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... \n7 Seeking a quant to work with senior researcher... \n40 We make small businesses more successful throu... \n44 Job Description\\nOur client, a music streaming... \n55 Hello, World! Codecademy has helped over 45 mi... \n... ... \n3880 Urgent need for DATASCIENCE TECHNICAL LEAD ARC... \n3882 RESPONSIBILITIES Kforce has a client in search... \n3883 Job Description\\nWe are experts at conducting ... \n3886 Job Description:\\nAct as a strong developer us... \n3897 Job Responsibility:\\nBased on business strateg... \n\n Company Name Location Industry \n2 Decode_M New York, NY -1 \n7 Enlightenment Research New York, NY -1 \n40 NorthOne\\n4.3 New York, NY -1 \n44 MUSIC & Entertainment New York, NY -1 \n55 Codeacademy New York, NY -1 \n... ... ... ... \n3880 DATAECONOMY\\n5.0 Columbus, OH -1 \n3882 Kforce Technology Staffing Columbus, OH -1 \n3883 Rainmaker Resources, LLC Columbus, OH -1 \n3886 Zllius Inc. Columbus, OH -1 \n3897 Kognetics\\n3.6 Gahanna, OH -1 \n\n[548 rows x 6 columns]",
526 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n 2 | \n Data Science Manager | \n $111K-$181K (Glassdoor est.) | \n Decode_M\\n\\nhttps://www.decode-m.com/\\n\\nData ... | \n Decode_M | \n New York, NY | \n -1 | \n
\n \n 7 | \n Quantitative Research Associate | \n $111K-$181K (Glassdoor est.) | \n Seeking a quant to work with senior researcher... | \n Enlightenment Research | \n New York, NY | \n -1 | \n
\n \n 40 | \n Data Scientist | \n $120K-$140K (Glassdoor est.) | \n We make small businesses more successful throu... | \n NorthOne\\n4.3 | \n New York, NY | \n -1 | \n
\n \n 44 | \n Data Science Analyst | \n $120K-$140K (Glassdoor est.) | \n Job Description\\nOur client, a music streaming... | \n MUSIC & Entertainment | \n New York, NY | \n -1 | \n
\n \n 55 | \n Data Scientist, Analytics & Inference | \n $120K-$140K (Glassdoor est.) | \n Hello, World! Codecademy has helped over 45 mi... | \n Codeacademy | \n New York, NY | \n -1 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 3880 | \n Data Science Technical Lead / Architect | \n $39K-$86K (Glassdoor est.) | \n Urgent need for DATASCIENCE TECHNICAL LEAD ARC... | \n DATAECONOMY\\n5.0 | \n Columbus, OH | \n -1 | \n
\n \n 3882 | \n Big Data Engineer | \n $55K-$112K (Glassdoor est.) | \n RESPONSIBILITIES Kforce has a client in search... | \n Kforce Technology Staffing | \n Columbus, OH | \n -1 | \n
\n \n 3883 | \n Columbus Opportunities: Finance, Accounting, D... | \n $55K-$112K (Glassdoor est.) | \n Job Description\\nWe are experts at conducting ... | \n Rainmaker Resources, LLC | \n Columbus, OH | \n -1 | \n
\n \n 3886 | \n Big Data Engineer | \n $55K-$112K (Glassdoor est.) | \n Job Description:\\nAct as a strong developer us... | \n Zllius Inc. | \n Columbus, OH | \n -1 | \n
\n \n 3897 | \n Senior Data Engineer | \n $55K-$112K (Glassdoor est.) | \n Job Responsibility:\\nBased on business strateg... | \n Kognetics\\n3.6 | \n Gahanna, OH | \n -1 | \n
\n \n
\n
548 rows × 6 columns
\n
"
527 | },
528 | "execution_count": 215,
529 | "metadata": {},
530 | "output_type": "execute_result"
531 | }
532 | ],
533 | "source": [
534 | "df[df['Industry'].str.contains('-1')]"
535 | ],
536 | "metadata": {
537 | "collapsed": false,
538 | "ExecuteTime": {
539 | "end_time": "2023-08-01T18:50:22.128714100Z",
540 | "start_time": "2023-08-01T18:50:22.117750800Z"
541 | }
542 | }
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": 216,
547 | "outputs": [],
548 | "source": [
549 | "df['Industry'].replace('-1', 'Unknown', inplace= True)"
550 | ],
551 | "metadata": {
552 | "collapsed": false,
553 | "ExecuteTime": {
554 | "end_time": "2023-08-01T18:50:22.319097600Z",
555 | "start_time": "2023-08-01T18:50:22.308134400Z"
556 | }
557 | }
558 | },
559 | {
560 | "cell_type": "code",
561 | "execution_count": 217,
562 | "outputs": [
563 | {
564 | "data": {
565 | "text/plain": " Job Title Salary Estimate \\\n684 Data Engineer $136K-$164K (Glassdoor est.) \n838 Data Analyst $39K-$81K (Glassdoor est.) \n874 CSI Data Analyst $76K-$147K (Glassdoor est.) \n1051 Senior Data Analyst $47K-$82K (Glassdoor est.) \n1544 Senior Data Engineer $97K-$111K (Glassdoor est.) \n\n Job Description \\\n684 Company Overview:\\nAge of Learning is a leadin... \n838 Chicago Public Schools (CPS) is the third larg... \n874 Chicago Public Schools (CPS) is the third larg... \n1051 Chicago Public Schools (CPS) is the third larg... \n1544 If you’re bright, highly motivated and want to... \n\n Company Name Location Industry \n684 Age of Learning\\n3.3 Glendale, CA K-12 Education \n838 Chicago Public Schools\\n3.5 Chicago, IL K-12 Education \n874 Chicago Public Schools\\n3.5 Chicago, IL K-12 Education \n1051 Chicago Public Schools\\n3.5 Chicago, IL K-12 Education \n1544 StrongMind\\n3.8 Chandler, AZ K-12 Education ",
566 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n 684 | \n Data Engineer | \n $136K-$164K (Glassdoor est.) | \n Company Overview:\\nAge of Learning is a leadin... | \n Age of Learning\\n3.3 | \n Glendale, CA | \n K-12 Education | \n
\n \n 838 | \n Data Analyst | \n $39K-$81K (Glassdoor est.) | \n Chicago Public Schools (CPS) is the third larg... | \n Chicago Public Schools\\n3.5 | \n Chicago, IL | \n K-12 Education | \n
\n \n 874 | \n CSI Data Analyst | \n $76K-$147K (Glassdoor est.) | \n Chicago Public Schools (CPS) is the third larg... | \n Chicago Public Schools\\n3.5 | \n Chicago, IL | \n K-12 Education | \n
\n \n 1051 | \n Senior Data Analyst | \n $47K-$82K (Glassdoor est.) | \n Chicago Public Schools (CPS) is the third larg... | \n Chicago Public Schools\\n3.5 | \n Chicago, IL | \n K-12 Education | \n
\n \n 1544 | \n Senior Data Engineer | \n $97K-$111K (Glassdoor est.) | \n If you’re bright, highly motivated and want to... | \n StrongMind\\n3.8 | \n Chandler, AZ | \n K-12 Education | \n
\n \n
\n
"
567 | },
568 | "execution_count": 217,
569 | "metadata": {},
570 | "output_type": "execute_result"
571 | }
572 | ],
573 | "source": [
574 | "df[df['Industry'].str.contains('-1')]"
575 | ],
576 | "metadata": {
577 | "collapsed": false,
578 | "ExecuteTime": {
579 | "end_time": "2023-08-01T18:50:22.667213700Z",
580 | "start_time": "2023-08-01T18:50:22.647262800Z"
581 | }
582 | }
583 | },
584 | {
585 | "cell_type": "code",
586 | "execution_count": 218,
587 | "outputs": [
588 | {
589 | "data": {
590 | "text/plain": "Unknown 543\nIT Services 471\nStaffing & Outsourcing 313\nBiotech & Pharmaceuticals 291\nComputer Hardware & Software 263\n ... \nCommercial Equipment Rental 1\nMetals Brokers 1\nTruck Rental & Leasing 1\nBeauty & Personal Accessories Stores 1\nAuto Repair & Maintenance 1\nName: Industry, Length: 96, dtype: int64"
591 | },
592 | "execution_count": 218,
593 | "metadata": {},
594 | "output_type": "execute_result"
595 | }
596 | ],
597 | "source": [
598 | "df['Industry'].value_counts()"
599 | ],
600 | "metadata": {
601 | "collapsed": false,
602 | "ExecuteTime": {
603 | "end_time": "2023-08-01T18:50:22.864332100Z",
604 | "start_time": "2023-08-01T18:50:22.830444800Z"
605 | }
606 | }
607 | },
608 | {
609 | "cell_type": "markdown",
610 | "source": [
611 | "# 5 - Standardise Values"
612 | ],
613 | "metadata": {
614 | "collapsed": false
615 | }
616 | },
617 | {
618 | "cell_type": "code",
619 | "execution_count": 219,
620 | "outputs": [
621 | {
622 | "data": {
623 | "text/plain": " Job Title \\\n0 Senior Data Scientist \n1 Data Scientist, Product Analytics \n5 Data Scientist \n8 AI Scientist \n10 Data Scientist \n... ... \n3872 IGM - Post Doctoral Scientist - Chaudhari Lab \n3884 Senior Research Scientist - RI IPP Cooper \n3893 Biotransformation Scientist and DMPK Design Lead \n3895 Senior/Principal Scientist - Display Technolog... \n3908 Patient Safety Physician or Safety Scientist -... \n\n Salary Estimate \\\n0 $111K-$181K (Glassdoor est.) \n1 $111K-$181K (Glassdoor est.) \n5 $111K-$181K (Glassdoor est.) \n8 $111K-$181K (Glassdoor est.) \n10 $111K-$181K (Glassdoor est.) \n... ... \n3872 $39K-$86K (Glassdoor est.) \n3884 $55K-$112K (Glassdoor est.) \n3893 $55K-$112K (Glassdoor est.) \n3895 $55K-$112K (Glassdoor est.) \n3908 $55K-$112K (Glassdoor est.) \n\n Job Description \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... \n1 At Noom, we use scientifically proven methods ... \n5 Job Brief\\n\\nThe ideal candidate will have pre... \n8 Paige is a software company helping pathologis... \n10 Company Description:\\n\\nQuartet is a pioneerin... \n... ... \n3872 JOB POSTING - Post Doctoral Scientist IGM\\nFul... \n3884 JOB POSTING – Senior Research Scientist – RI I... \n3893 Help us transform patients' lives.\\n\\nAt UCB, ... \n3895 Help us transform patients' lives.\\n\\nAt UCB, ... \n3908 Help us transform patients' lives.\\nAt UCB, we... \n\n Company Name Location \\\n0 Hopper\\n3.5 New York, NY \n1 Noom US\\n4.5 New York, NY \n5 IFG Companies\\n2.9 New York, NY \n8 Paige\\n5.0 New York, NY \n10 Quartet Health\\n3.9 New York, NY \n... ... ... \n3872 Nationwide Children's Hospital\\n3.7 Columbus, OH \n3884 Nationwide Children's Hospital\\n3.7 Columbus, OH \n3893 UCB\\n3.7 Slough, OH \n3895 UCB\\n3.7 Slough, OH \n3908 UCB\\n3.7 Slough, OH \n\n Industry \n0 Travel Agencies \n1 Health, Beauty, & Fitness \n5 Insurance Carriers \n8 Enterprise Software & Network Solutions \n10 Enterprise Software & Network Solutions \n... ... \n3872 Health Care Services & Hospitals \n3884 Health Care Services & Hospitals \n3893 Biotech & Pharmaceuticals \n3895 Biotech & Pharmaceuticals \n3908 Biotech & Pharmaceuticals \n\n[1746 rows x 6 columns]",
624 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n 0 | \n Senior Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n
\n \n 1 | \n Data Scientist, Product Analytics | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n
\n \n 5 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies\\n2.9 | \n New York, NY | \n Insurance Carriers | \n
\n \n 8 | \n AI Scientist | \n $111K-$181K (Glassdoor est.) | \n Paige is a software company helping pathologis... | \n Paige\\n5.0 | \n New York, NY | \n Enterprise Software & Network Solutions | \n
\n \n 10 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Company Description:\\n\\nQuartet is a pioneerin... | \n Quartet Health\\n3.9 | \n New York, NY | \n Enterprise Software & Network Solutions | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 3872 | \n IGM - Post Doctoral Scientist - Chaudhari Lab | \n $39K-$86K (Glassdoor est.) | \n JOB POSTING - Post Doctoral Scientist IGM\\nFul... | \n Nationwide Children's Hospital\\n3.7 | \n Columbus, OH | \n Health Care Services & Hospitals | \n
\n \n 3884 | \n Senior Research Scientist - RI IPP Cooper | \n $55K-$112K (Glassdoor est.) | \n JOB POSTING – Senior Research Scientist – RI I... | \n Nationwide Children's Hospital\\n3.7 | \n Columbus, OH | \n Health Care Services & Hospitals | \n
\n \n 3893 | \n Biotransformation Scientist and DMPK Design Lead | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\n\\nAt UCB, ... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n 3895 | \n Senior/Principal Scientist - Display Technolog... | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\n\\nAt UCB, ... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n 3908 | \n Patient Safety Physician or Safety Scientist -... | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\nAt UCB, we... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n
\n
1746 rows × 6 columns
\n
"
625 | },
626 | "execution_count": 219,
627 | "metadata": {},
628 | "output_type": "execute_result"
629 | }
630 | ],
631 | "source": [
632 | "df[df['Job Title'].str.contains('scientist', case = False)]"
633 | ],
634 | "metadata": {
635 | "collapsed": false,
636 | "ExecuteTime": {
637 | "end_time": "2023-08-01T18:50:23.608187100Z",
638 | "start_time": "2023-08-01T18:50:23.580219500Z"
639 | }
640 | }
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": 220,
645 | "outputs": [],
646 | "source": [
647 | "df.loc[df['Job Title'].str.contains('scientist', case = False), 'Job Title'] = 'Data Scientist'"
648 | ],
649 | "metadata": {
650 | "collapsed": false,
651 | "ExecuteTime": {
652 | "end_time": "2023-08-01T18:50:23.794077400Z",
653 | "start_time": "2023-08-01T18:50:23.774142500Z"
654 | }
655 | }
656 | },
657 | {
658 | "cell_type": "code",
659 | "execution_count": 221,
660 | "outputs": [
661 | {
662 | "data": {
663 | "text/plain": " Job Title Salary Estimate \\\n0 Data Scientist $111K-$181K (Glassdoor est.) \n1 Data Scientist $111K-$181K (Glassdoor est.) \n5 Data Scientist $111K-$181K (Glassdoor est.) \n8 Data Scientist $111K-$181K (Glassdoor est.) \n10 Data Scientist $111K-$181K (Glassdoor est.) \n... ... ... \n3872 Data Scientist $39K-$86K (Glassdoor est.) \n3884 Data Scientist $55K-$112K (Glassdoor est.) \n3893 Data Scientist $55K-$112K (Glassdoor est.) \n3895 Data Scientist $55K-$112K (Glassdoor est.) \n3908 Data Scientist $55K-$112K (Glassdoor est.) \n\n Job Description \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... \n1 At Noom, we use scientifically proven methods ... \n5 Job Brief\\n\\nThe ideal candidate will have pre... \n8 Paige is a software company helping pathologis... \n10 Company Description:\\n\\nQuartet is a pioneerin... \n... ... \n3872 JOB POSTING - Post Doctoral Scientist IGM\\nFul... \n3884 JOB POSTING – Senior Research Scientist – RI I... \n3893 Help us transform patients' lives.\\n\\nAt UCB, ... \n3895 Help us transform patients' lives.\\n\\nAt UCB, ... \n3908 Help us transform patients' lives.\\nAt UCB, we... \n\n Company Name Location \\\n0 Hopper\\n3.5 New York, NY \n1 Noom US\\n4.5 New York, NY \n5 IFG Companies\\n2.9 New York, NY \n8 Paige\\n5.0 New York, NY \n10 Quartet Health\\n3.9 New York, NY \n... ... ... \n3872 Nationwide Children's Hospital\\n3.7 Columbus, OH \n3884 Nationwide Children's Hospital\\n3.7 Columbus, OH \n3893 UCB\\n3.7 Slough, OH \n3895 UCB\\n3.7 Slough, OH \n3908 UCB\\n3.7 Slough, OH \n\n Industry \n0 Travel Agencies \n1 Health, Beauty, & Fitness \n5 Insurance Carriers \n8 Enterprise Software & Network Solutions \n10 Enterprise Software & Network Solutions \n... ... \n3872 Health Care Services & Hospitals \n3884 Health Care Services & Hospitals \n3893 Biotech & Pharmaceuticals \n3895 Biotech & Pharmaceuticals \n3908 Biotech & Pharmaceuticals \n\n[1746 rows x 6 columns]",
664 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n
\n \n \n \n 0 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n
\n \n 1 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n
\n \n 5 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies\\n2.9 | \n New York, NY | \n Insurance Carriers | \n
\n \n 8 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Paige is a software company helping pathologis... | \n Paige\\n5.0 | \n New York, NY | \n Enterprise Software & Network Solutions | \n
\n \n 10 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Company Description:\\n\\nQuartet is a pioneerin... | \n Quartet Health\\n3.9 | \n New York, NY | \n Enterprise Software & Network Solutions | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 3872 | \n Data Scientist | \n $39K-$86K (Glassdoor est.) | \n JOB POSTING - Post Doctoral Scientist IGM\\nFul... | \n Nationwide Children's Hospital\\n3.7 | \n Columbus, OH | \n Health Care Services & Hospitals | \n
\n \n 3884 | \n Data Scientist | \n $55K-$112K (Glassdoor est.) | \n JOB POSTING – Senior Research Scientist – RI I... | \n Nationwide Children's Hospital\\n3.7 | \n Columbus, OH | \n Health Care Services & Hospitals | \n
\n \n 3893 | \n Data Scientist | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\n\\nAt UCB, ... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n 3895 | \n Data Scientist | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\n\\nAt UCB, ... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n 3908 | \n Data Scientist | \n $55K-$112K (Glassdoor est.) | \n Help us transform patients' lives.\\nAt UCB, we... | \n UCB\\n3.7 | \n Slough, OH | \n Biotech & Pharmaceuticals | \n
\n \n
\n
1746 rows × 6 columns
\n
"
665 | },
666 | "execution_count": 221,
667 | "metadata": {},
668 | "output_type": "execute_result"
669 | }
670 | ],
671 | "source": [
672 | "df[df['Job Title'].str.contains('scientist', case = False)]"
673 | ],
674 | "metadata": {
675 | "collapsed": false,
676 | "ExecuteTime": {
677 | "end_time": "2023-08-01T18:50:23.995303400Z",
678 | "start_time": "2023-08-01T18:50:23.968512700Z"
679 | }
680 | }
681 | },
682 | {
683 | "cell_type": "code",
684 | "execution_count": 222,
685 | "outputs": [],
686 | "source": [
687 | "df.loc[df['Job Title'].str.contains('analyst', case = False), 'Job Title'] = 'Data Analyst'"
688 | ],
689 | "metadata": {
690 | "collapsed": false,
691 | "ExecuteTime": {
692 | "end_time": "2023-08-01T18:50:24.528641500Z",
693 | "start_time": "2023-08-01T18:50:24.517293800Z"
694 | }
695 | }
696 | },
697 | {
698 | "cell_type": "code",
699 | "execution_count": 223,
700 | "outputs": [],
701 | "source": [
702 | "df.loc[df['Job Title'].str.contains('engineer', case = False), 'Job Title'] = 'Data Engineer'"
703 | ],
704 | "metadata": {
705 | "collapsed": false,
706 | "ExecuteTime": {
707 | "end_time": "2023-08-01T18:50:24.747497200Z",
708 | "start_time": "2023-08-01T18:50:24.721583900Z"
709 | }
710 | }
711 | },
712 | {
713 | "cell_type": "code",
714 | "execution_count": 224,
715 | "outputs": [
716 | {
717 | "data": {
718 | "text/plain": "Data Scientist 1746\nData Engineer 878\nData Analyst 874\nData Modeler 17\nMicrosoft Analytics Consultant 9\n ... \nManaging Consultant - Data Science 1\nRevenue Management and Strategic Analytics Manager 1\nHead of Data Science, Americas 1\nData Modeler Architect 1\nColumbus Data Science Tutor Jobs 1\nName: Job Title, Length: 279, dtype: int64"
719 | },
720 | "execution_count": 224,
721 | "metadata": {},
722 | "output_type": "execute_result"
723 | }
724 | ],
725 | "source": [
726 | "df['Job Title']. value_counts()"
727 | ],
728 | "metadata": {
729 | "collapsed": false,
730 | "ExecuteTime": {
731 | "end_time": "2023-08-01T18:50:24.932897300Z",
732 | "start_time": "2023-08-01T18:50:24.913960800Z"
733 | }
734 | }
735 | },
736 | {
737 | "cell_type": "code",
738 | "execution_count": 225,
739 | "outputs": [],
740 | "source": [
741 | "threshold = 800\n",
742 | "df_counts = df['Job Title']. value_counts()"
743 | ],
744 | "metadata": {
745 | "collapsed": false,
746 | "ExecuteTime": {
747 | "end_time": "2023-08-01T18:50:25.159079500Z",
748 | "start_time": "2023-08-01T18:50:25.140142700Z"
749 | }
750 | }
751 | },
752 | {
753 | "cell_type": "code",
754 | "execution_count": 226,
755 | "outputs": [],
756 | "source": [
757 | "df_thresdrop = df['Job Title'].isin(df_counts.index[df_counts < threshold])"
758 | ],
759 | "metadata": {
760 | "collapsed": false,
761 | "ExecuteTime": {
762 | "end_time": "2023-08-01T18:50:25.764778Z",
763 | "start_time": "2023-08-01T18:50:25.740859600Z"
764 | }
765 | }
766 | },
767 | {
768 | "cell_type": "code",
769 | "execution_count": 227,
770 | "outputs": [],
771 | "source": [
772 | "df = df[~df_thresdrop]"
773 | ],
774 | "metadata": {
775 | "collapsed": false,
776 | "ExecuteTime": {
777 | "end_time": "2023-08-01T18:50:26.036380100Z",
778 | "start_time": "2023-08-01T18:50:26.024420300Z"
779 | }
780 | }
781 | },
782 | {
783 | "cell_type": "code",
784 | "execution_count": 228,
785 | "outputs": [
786 | {
787 | "data": {
788 | "text/plain": "Data Scientist 1746\nData Engineer 878\nData Analyst 874\nName: Job Title, dtype: int64"
789 | },
790 | "execution_count": 228,
791 | "metadata": {},
792 | "output_type": "execute_result"
793 | }
794 | ],
795 | "source": [
796 | "df['Job Title'].value_counts()"
797 | ],
798 | "metadata": {
799 | "collapsed": false,
800 | "ExecuteTime": {
801 | "end_time": "2023-08-01T18:50:26.270848600Z",
802 | "start_time": "2023-08-01T18:50:26.252908600Z"
803 | }
804 | }
805 | },
806 | {
807 | "cell_type": "markdown",
808 | "source": [
809 | "# 6 - Remove a Substring"
810 | ],
811 | "metadata": {
812 | "collapsed": false
813 | }
814 | },
815 | {
816 | "cell_type": "code",
817 | "execution_count": 229,
818 | "outputs": [],
819 | "source": [
820 | "df[['Est Min Salary', 'Est Max Salary']] = df['Salary Estimate'].str.split('-', expand = True)"
821 | ],
822 | "metadata": {
823 | "collapsed": false,
824 | "ExecuteTime": {
825 | "end_time": "2023-08-01T18:50:26.935673200Z",
826 | "start_time": "2023-08-01T18:50:26.924709900Z"
827 | }
828 | }
829 | },
830 | {
831 | "cell_type": "code",
832 | "execution_count": 230,
833 | "outputs": [
834 | {
835 | "data": {
836 | "text/plain": " Job Title Salary Estimate \\\n0 Data Scientist $111K-$181K (Glassdoor est.) \n1 Data Scientist $111K-$181K (Glassdoor est.) \n5 Data Scientist $111K-$181K (Glassdoor est.) \n8 Data Scientist $111K-$181K (Glassdoor est.) \n10 Data Scientist $111K-$181K (Glassdoor est.) \n\n Job Description Company Name \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... Hopper\\n3.5 \n1 At Noom, we use scientifically proven methods ... Noom US\\n4.5 \n5 Job Brief\\n\\nThe ideal candidate will have pre... IFG Companies\\n2.9 \n8 Paige is a software company helping pathologis... Paige\\n5.0 \n10 Company Description:\\n\\nQuartet is a pioneerin... Quartet Health\\n3.9 \n\n Location Industry Est Min Salary \\\n0 New York, NY Travel Agencies $111K \n1 New York, NY Health, Beauty, & Fitness $111K \n5 New York, NY Insurance Carriers $111K \n8 New York, NY Enterprise Software & Network Solutions $111K \n10 New York, NY Enterprise Software & Network Solutions $111K \n\n Est Max Salary \n0 $181K (Glassdoor est.) \n1 $181K (Glassdoor est.) \n5 $181K (Glassdoor est.) \n8 $181K (Glassdoor est.) \n10 $181K (Glassdoor est.) ",
837 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n Est Min Salary | \n Est Max Salary | \n
\n \n \n \n 0 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n $111K | \n $181K (Glassdoor est.) | \n
\n \n 1 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n $111K | \n $181K (Glassdoor est.) | \n
\n \n 5 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies\\n2.9 | \n New York, NY | \n Insurance Carriers | \n $111K | \n $181K (Glassdoor est.) | \n
\n \n 8 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Paige is a software company helping pathologis... | \n Paige\\n5.0 | \n New York, NY | \n Enterprise Software & Network Solutions | \n $111K | \n $181K (Glassdoor est.) | \n
\n \n 10 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Company Description:\\n\\nQuartet is a pioneerin... | \n Quartet Health\\n3.9 | \n New York, NY | \n Enterprise Software & Network Solutions | \n $111K | \n $181K (Glassdoor est.) | \n
\n \n
\n
"
838 | },
839 | "execution_count": 230,
840 | "metadata": {},
841 | "output_type": "execute_result"
842 | }
843 | ],
844 | "source": [
845 | "df.head()"
846 | ],
847 | "metadata": {
848 | "collapsed": false,
849 | "ExecuteTime": {
850 | "end_time": "2023-08-01T18:50:27.274672500Z",
851 | "start_time": "2023-08-01T18:50:27.249755900Z"
852 | }
853 | }
854 | },
855 | {
856 | "cell_type": "code",
857 | "execution_count": 231,
858 | "outputs": [],
859 | "source": [
860 | "df[['Est Max Salary', 'Source']] = df['Est Max Salary'].str.split('(', expand = True)"
861 | ],
862 | "metadata": {
863 | "collapsed": false,
864 | "ExecuteTime": {
865 | "end_time": "2023-08-01T18:50:27.455083200Z",
866 | "start_time": "2023-08-01T18:50:27.447110100Z"
867 | }
868 | }
869 | },
870 | {
871 | "cell_type": "code",
872 | "execution_count": 232,
873 | "outputs": [
874 | {
875 | "data": {
876 | "text/plain": " Job Title Salary Estimate \\\n0 Data Scientist $111K-$181K (Glassdoor est.) \n1 Data Scientist $111K-$181K (Glassdoor est.) \n5 Data Scientist $111K-$181K (Glassdoor est.) \n8 Data Scientist $111K-$181K (Glassdoor est.) \n10 Data Scientist $111K-$181K (Glassdoor est.) \n\n Job Description Company Name \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... Hopper\\n3.5 \n1 At Noom, we use scientifically proven methods ... Noom US\\n4.5 \n5 Job Brief\\n\\nThe ideal candidate will have pre... IFG Companies\\n2.9 \n8 Paige is a software company helping pathologis... Paige\\n5.0 \n10 Company Description:\\n\\nQuartet is a pioneerin... Quartet Health\\n3.9 \n\n Location Industry Est Min Salary \\\n0 New York, NY Travel Agencies $111K \n1 New York, NY Health, Beauty, & Fitness $111K \n5 New York, NY Insurance Carriers $111K \n8 New York, NY Enterprise Software & Network Solutions $111K \n10 New York, NY Enterprise Software & Network Solutions $111K \n\n Est Max Salary Source \n0 $181K Glassdoor est.) \n1 $181K Glassdoor est.) \n5 $181K Glassdoor est.) \n8 $181K Glassdoor est.) \n10 $181K Glassdoor est.) ",
877 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n Est Min Salary | \n Est Max Salary | \n Source | \n
\n \n \n \n 0 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n $111K | \n $181K | \n Glassdoor est.) | \n
\n \n 1 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n $111K | \n $181K | \n Glassdoor est.) | \n
\n \n 5 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies\\n2.9 | \n New York, NY | \n Insurance Carriers | \n $111K | \n $181K | \n Glassdoor est.) | \n
\n \n 8 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Paige is a software company helping pathologis... | \n Paige\\n5.0 | \n New York, NY | \n Enterprise Software & Network Solutions | \n $111K | \n $181K | \n Glassdoor est.) | \n
\n \n 10 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Company Description:\\n\\nQuartet is a pioneerin... | \n Quartet Health\\n3.9 | \n New York, NY | \n Enterprise Software & Network Solutions | \n $111K | \n $181K | \n Glassdoor est.) | \n
\n \n
\n
"
878 | },
879 | "execution_count": 232,
880 | "metadata": {},
881 | "output_type": "execute_result"
882 | }
883 | ],
884 | "source": [
885 | "df.head()"
886 | ],
887 | "metadata": {
888 | "collapsed": false,
889 | "ExecuteTime": {
890 | "end_time": "2023-08-01T18:50:27.813243300Z",
891 | "start_time": "2023-08-01T18:50:27.805935300Z"
892 | }
893 | }
894 | },
895 | {
896 | "cell_type": "code",
897 | "execution_count": 233,
898 | "outputs": [],
899 | "source": [
900 | "replace_dict = {'\\$' : '', 'K':'000'}"
901 | ],
902 | "metadata": {
903 | "collapsed": false,
904 | "ExecuteTime": {
905 | "end_time": "2023-08-01T18:50:28.310579700Z",
906 | "start_time": "2023-08-01T18:50:28.296617600Z"
907 | }
908 | }
909 | },
910 | {
911 | "cell_type": "code",
912 | "execution_count": 234,
913 | "outputs": [],
914 | "source": [
915 | "df['Est Min Salary'] = df['Est Min Salary'].replace(replace_dict, regex = True)"
916 | ],
917 | "metadata": {
918 | "collapsed": false,
919 | "ExecuteTime": {
920 | "end_time": "2023-08-01T18:50:28.575615700Z",
921 | "start_time": "2023-08-01T18:50:28.552692100Z"
922 | }
923 | }
924 | },
925 | {
926 | "cell_type": "code",
927 | "execution_count": 235,
928 | "outputs": [
929 | {
930 | "data": {
931 | "text/plain": " Job Title Salary Estimate \\\n0 Data Scientist $111K-$181K (Glassdoor est.) \n1 Data Scientist $111K-$181K (Glassdoor est.) \n5 Data Scientist $111K-$181K (Glassdoor est.) \n8 Data Scientist $111K-$181K (Glassdoor est.) \n10 Data Scientist $111K-$181K (Glassdoor est.) \n\n Job Description Company Name \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... Hopper\\n3.5 \n1 At Noom, we use scientifically proven methods ... Noom US\\n4.5 \n5 Job Brief\\n\\nThe ideal candidate will have pre... IFG Companies\\n2.9 \n8 Paige is a software company helping pathologis... Paige\\n5.0 \n10 Company Description:\\n\\nQuartet is a pioneerin... Quartet Health\\n3.9 \n\n Location Industry Est Min Salary \\\n0 New York, NY Travel Agencies 111000 \n1 New York, NY Health, Beauty, & Fitness 111000 \n5 New York, NY Insurance Carriers 111000 \n8 New York, NY Enterprise Software & Network Solutions 111000 \n10 New York, NY Enterprise Software & Network Solutions 111000 \n\n Est Max Salary Source \n0 $181K Glassdoor est.) \n1 $181K Glassdoor est.) \n5 $181K Glassdoor est.) \n8 $181K Glassdoor est.) \n10 $181K Glassdoor est.) ",
932 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n Est Min Salary | \n Est Max Salary | \n Source | \n
\n \n \n \n 0 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n 111000 | \n $181K | \n Glassdoor est.) | \n
\n \n 1 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n 111000 | \n $181K | \n Glassdoor est.) | \n
\n \n 5 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies\\n2.9 | \n New York, NY | \n Insurance Carriers | \n 111000 | \n $181K | \n Glassdoor est.) | \n
\n \n 8 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Paige is a software company helping pathologis... | \n Paige\\n5.0 | \n New York, NY | \n Enterprise Software & Network Solutions | \n 111000 | \n $181K | \n Glassdoor est.) | \n
\n \n 10 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Company Description:\\n\\nQuartet is a pioneerin... | \n Quartet Health\\n3.9 | \n New York, NY | \n Enterprise Software & Network Solutions | \n 111000 | \n $181K | \n Glassdoor est.) | \n
\n \n
\n
"
933 | },
934 | "execution_count": 235,
935 | "metadata": {},
936 | "output_type": "execute_result"
937 | }
938 | ],
939 | "source": [
940 | "df.head()"
941 | ],
942 | "metadata": {
943 | "collapsed": false,
944 | "ExecuteTime": {
945 | "end_time": "2023-08-01T18:50:28.793899600Z",
946 | "start_time": "2023-08-01T18:50:28.779947700Z"
947 | }
948 | }
949 | },
950 | {
951 | "cell_type": "code",
952 | "execution_count": 236,
953 | "outputs": [],
954 | "source": [
955 | "df['Est Max Salary'] = df['Est Max Salary'].replace(replace_dict, regex = True)"
956 | ],
957 | "metadata": {
958 | "collapsed": false,
959 | "ExecuteTime": {
960 | "end_time": "2023-08-01T18:50:29.063058600Z",
961 | "start_time": "2023-08-01T18:50:29.036148700Z"
962 | }
963 | }
964 | },
965 | {
966 | "cell_type": "code",
967 | "execution_count": 237,
968 | "outputs": [
969 | {
970 | "data": {
971 | "text/plain": " Job Title Salary Estimate \\\n0 Data Scientist $111K-$181K (Glassdoor est.) \n1 Data Scientist $111K-$181K (Glassdoor est.) \n5 Data Scientist $111K-$181K (Glassdoor est.) \n8 Data Scientist $111K-$181K (Glassdoor est.) \n10 Data Scientist $111K-$181K (Glassdoor est.) \n\n Job Description Company Name \\\n0 ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... Hopper\\n3.5 \n1 At Noom, we use scientifically proven methods ... Noom US\\n4.5 \n5 Job Brief\\n\\nThe ideal candidate will have pre... IFG Companies\\n2.9 \n8 Paige is a software company helping pathologis... Paige\\n5.0 \n10 Company Description:\\n\\nQuartet is a pioneerin... Quartet Health\\n3.9 \n\n Location Industry Est Min Salary \\\n0 New York, NY Travel Agencies 111000 \n1 New York, NY Health, Beauty, & Fitness 111000 \n5 New York, NY Insurance Carriers 111000 \n8 New York, NY Enterprise Software & Network Solutions 111000 \n10 New York, NY Enterprise Software & Network Solutions 111000 \n\n Est Max Salary Source \n0 181000 Glassdoor est.) \n1 181000 Glassdoor est.) \n5 181000 Glassdoor est.) \n8 181000 Glassdoor est.) \n10 181000 Glassdoor est.) ",
972 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Salary Estimate | \n Job Description | \n Company Name | \n Location | \n Industry | \n Est Min Salary | \n Est Max Salary | \n Source | \n
\n \n \n \n 0 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper\\n3.5 | \n New York, NY | \n Travel Agencies | \n 111000 | \n 181000 | \n Glassdoor est.) | \n
\n \n 1 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n At Noom, we use scientifically proven methods ... | \n Noom US\\n4.5 | \n New York, NY | \n Health, Beauty, & Fitness | \n 111000 | \n 181000 | \n Glassdoor est.) | \n
\n \n 5 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies\\n2.9 | \n New York, NY | \n Insurance Carriers | \n 111000 | \n 181000 | \n Glassdoor est.) | \n
\n \n 8 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Paige is a software company helping pathologis... | \n Paige\\n5.0 | \n New York, NY | \n Enterprise Software & Network Solutions | \n 111000 | \n 181000 | \n Glassdoor est.) | \n
\n \n 10 | \n Data Scientist | \n $111K-$181K (Glassdoor est.) | \n Company Description:\\n\\nQuartet is a pioneerin... | \n Quartet Health\\n3.9 | \n New York, NY | \n Enterprise Software & Network Solutions | \n 111000 | \n 181000 | \n Glassdoor est.) | \n
\n \n
\n
"
973 | },
974 | "execution_count": 237,
975 | "metadata": {},
976 | "output_type": "execute_result"
977 | }
978 | ],
979 | "source": [
980 | "df.head()"
981 | ],
982 | "metadata": {
983 | "collapsed": false,
984 | "ExecuteTime": {
985 | "end_time": "2023-08-01T18:50:29.451701400Z",
986 | "start_time": "2023-08-01T18:50:29.424791Z"
987 | }
988 | }
989 | },
990 | {
991 | "cell_type": "code",
992 | "execution_count": 238,
993 | "outputs": [],
994 | "source": [
995 | "df.drop('Source', inplace=True, axis=1)"
996 | ],
997 | "metadata": {
998 | "collapsed": false,
999 | "ExecuteTime": {
1000 | "end_time": "2023-08-01T18:50:30.158161500Z",
1001 | "start_time": "2023-08-01T18:50:30.140222100Z"
1002 | }
1003 | }
1004 | },
1005 | {
1006 | "cell_type": "markdown",
1007 | "source": [
1008 | "# 7 - Convert Datatypes"
1009 | ],
1010 | "metadata": {
1011 | "collapsed": false
1012 | }
1013 | },
1014 | {
1015 | "cell_type": "code",
1016 | "execution_count": 239,
1017 | "outputs": [
1018 | {
1019 | "data": {
1020 | "text/plain": "Job Title object\nSalary Estimate object\nJob Description object\nCompany Name object\nLocation object\nIndustry object\nEst Min Salary object\nEst Max Salary object\ndtype: object"
1021 | },
1022 | "execution_count": 239,
1023 | "metadata": {},
1024 | "output_type": "execute_result"
1025 | }
1026 | ],
1027 | "source": [
1028 | "df.dtypes"
1029 | ],
1030 | "metadata": {
1031 | "collapsed": false,
1032 | "ExecuteTime": {
1033 | "end_time": "2023-08-01T18:50:30.799392800Z",
1034 | "start_time": "2023-08-01T18:50:30.788007300Z"
1035 | }
1036 | }
1037 | },
1038 | {
1039 | "cell_type": "code",
1040 | "execution_count": 240,
1041 | "outputs": [],
1042 | "source": [
1043 | "df['Est Min Salary'] = df['Est Min Salary'].astype(int)"
1044 | ],
1045 | "metadata": {
1046 | "collapsed": false,
1047 | "ExecuteTime": {
1048 | "end_time": "2023-08-01T18:50:31.227812200Z",
1049 | "start_time": "2023-08-01T18:50:31.205247Z"
1050 | }
1051 | }
1052 | },
1053 | {
1054 | "cell_type": "code",
1055 | "execution_count": 241,
1056 | "outputs": [],
1057 | "source": [
1058 | "df['Est Max Salary'] = df['Est Max Salary'].astype(int)"
1059 | ],
1060 | "metadata": {
1061 | "collapsed": false,
1062 | "ExecuteTime": {
1063 | "end_time": "2023-08-01T18:50:32.576341100Z",
1064 | "start_time": "2023-08-01T18:50:32.553417400Z"
1065 | }
1066 | }
1067 | },
1068 | {
1069 | "cell_type": "code",
1070 | "execution_count": 242,
1071 | "outputs": [
1072 | {
1073 | "data": {
1074 | "text/plain": "Job Title object\nSalary Estimate object\nJob Description object\nCompany Name object\nLocation object\nIndustry object\nEst Min Salary int32\nEst Max Salary int32\ndtype: object"
1075 | },
1076 | "execution_count": 242,
1077 | "metadata": {},
1078 | "output_type": "execute_result"
1079 | }
1080 | ],
1081 | "source": [
1082 | "df.dtypes"
1083 | ],
1084 | "metadata": {
1085 | "collapsed": false,
1086 | "ExecuteTime": {
1087 | "end_time": "2023-08-01T18:50:33.184123400Z",
1088 | "start_time": "2023-08-01T18:50:33.163193800Z"
1089 | }
1090 | }
1091 | },
1092 | {
1093 | "cell_type": "code",
1094 | "execution_count": 243,
1095 | "outputs": [],
1096 | "source": [
1097 | "df = df.drop('Salary Estimate', axis=1)"
1098 | ],
1099 | "metadata": {
1100 | "collapsed": false,
1101 | "ExecuteTime": {
1102 | "end_time": "2023-08-01T18:50:33.820676300Z",
1103 | "start_time": "2023-08-01T18:50:33.804221900Z"
1104 | }
1105 | }
1106 | },
1107 | {
1108 | "cell_type": "markdown",
1109 | "source": [
1110 | "# 8 - Stripping Letters from strings"
1111 | ],
1112 | "metadata": {
1113 | "collapsed": false
1114 | }
1115 | },
1116 | {
1117 | "cell_type": "code",
1118 | "execution_count": 244,
1119 | "outputs": [],
1120 | "source": [
1121 | "df['Company Name'] = df['Company Name'].str[:-4]"
1122 | ],
1123 | "metadata": {
1124 | "collapsed": false,
1125 | "ExecuteTime": {
1126 | "end_time": "2023-08-01T18:51:37.723942300Z",
1127 | "start_time": "2023-08-01T18:51:37.717313200Z"
1128 | }
1129 | }
1130 | },
1131 | {
1132 | "cell_type": "code",
1133 | "execution_count": 245,
1134 | "outputs": [
1135 | {
1136 | "data": {
1137 | "text/plain": " Job Title Job Description \\\n0 Data Scientist ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... \n1 Data Scientist At Noom, we use scientifically proven methods ... \n5 Data Scientist Job Brief\\n\\nThe ideal candidate will have pre... \n8 Data Scientist Paige is a software company helping pathologis... \n10 Data Scientist Company Description:\\n\\nQuartet is a pioneerin... \n11 Data Scientist PulsePoint™, a global programmatic advertising... \n12 Data Scientist Medidata: Conquering Diseases Together\\n\\nMedi... \n13 Data Scientist A Career with Point72’s MI Data team\\nAt Point... \n14 Data Scientist Two Sigma is a different kind of investment ma... \n15 Data Scientist Data Scientist\\nAffinity Solutions / Marketing... \n\n Company Name Location Industry \\\n0 Hopper New York, NY Travel Agencies \n1 Noom US New York, NY Health, Beauty, & Fitness \n5 IFG Companies New York, NY Insurance Carriers \n8 Paige New York, NY Enterprise Software & Network Solutions \n10 Quartet Health New York, NY Enterprise Software & Network Solutions \n11 PulsePoint New York, NY Internet \n12 Medidata Solutions New York, NY Enterprise Software & Network Solutions \n13 Point72 New York, NY Investment Banking & Asset Management \n14 Two Sigma New York, NY Investment Banking & Asset Management \n15 Affinity Solutions New York, NY Advertising & Marketing \n\n Est Min Salary Est Max Salary \n0 111000 181000 \n1 111000 181000 \n5 111000 181000 \n8 111000 181000 \n10 111000 181000 \n11 111000 181000 \n12 111000 181000 \n13 111000 181000 \n14 111000 181000 \n15 111000 181000 ",
1138 | "text/html": "\n\n
\n \n \n | \n Job Title | \n Job Description | \n Company Name | \n Location | \n Industry | \n Est Min Salary | \n Est Max Salary | \n
\n \n \n \n 0 | \n Data Scientist | \n ABOUT HOPPER\\n\\nAt Hopper, we’re on a mission ... | \n Hopper | \n New York, NY | \n Travel Agencies | \n 111000 | \n 181000 | \n
\n \n 1 | \n Data Scientist | \n At Noom, we use scientifically proven methods ... | \n Noom US | \n New York, NY | \n Health, Beauty, & Fitness | \n 111000 | \n 181000 | \n
\n \n 5 | \n Data Scientist | \n Job Brief\\n\\nThe ideal candidate will have pre... | \n IFG Companies | \n New York, NY | \n Insurance Carriers | \n 111000 | \n 181000 | \n
\n \n 8 | \n Data Scientist | \n Paige is a software company helping pathologis... | \n Paige | \n New York, NY | \n Enterprise Software & Network Solutions | \n 111000 | \n 181000 | \n
\n \n 10 | \n Data Scientist | \n Company Description:\\n\\nQuartet is a pioneerin... | \n Quartet Health | \n New York, NY | \n Enterprise Software & Network Solutions | \n 111000 | \n 181000 | \n
\n \n 11 | \n Data Scientist | \n PulsePoint™, a global programmatic advertising... | \n PulsePoint | \n New York, NY | \n Internet | \n 111000 | \n 181000 | \n
\n \n 12 | \n Data Scientist | \n Medidata: Conquering Diseases Together\\n\\nMedi... | \n Medidata Solutions | \n New York, NY | \n Enterprise Software & Network Solutions | \n 111000 | \n 181000 | \n
\n \n 13 | \n Data Scientist | \n A Career with Point72’s MI Data team\\nAt Point... | \n Point72 | \n New York, NY | \n Investment Banking & Asset Management | \n 111000 | \n 181000 | \n
\n \n 14 | \n Data Scientist | \n Two Sigma is a different kind of investment ma... | \n Two Sigma | \n New York, NY | \n Investment Banking & Asset Management | \n 111000 | \n 181000 | \n
\n \n 15 | \n Data Scientist | \n Data Scientist\\nAffinity Solutions / Marketing... | \n Affinity Solutions | \n New York, NY | \n Advertising & Marketing | \n 111000 | \n 181000 | \n
\n \n
\n
"
1139 | },
1140 | "execution_count": 245,
1141 | "metadata": {},
1142 | "output_type": "execute_result"
1143 | }
1144 | ],
1145 | "source": [
1146 | "df.head(10)"
1147 | ],
1148 | "metadata": {
1149 | "collapsed": false,
1150 | "ExecuteTime": {
1151 | "end_time": "2023-08-01T18:51:41.190987500Z",
1152 | "start_time": "2023-08-01T18:51:41.170378300Z"
1153 | }
1154 | }
1155 | },
1156 | {
1157 | "cell_type": "markdown",
1158 | "source": [
1159 | "# Mine Job Description"
1160 | ],
1161 | "metadata": {
1162 | "collapsed": false
1163 | }
1164 | },
1165 | {
1166 | "cell_type": "code",
1167 | "execution_count": 246,
1168 | "outputs": [],
1169 | "source": [
1170 | "skill_set = ['sql','python', 'power bi', 'tableau', 'excel', ' r ']"
1171 | ],
1172 | "metadata": {
1173 | "collapsed": false,
1174 | "ExecuteTime": {
1175 | "end_time": "2023-08-01T18:54:41.775406400Z",
1176 | "start_time": "2023-08-01T18:54:41.757410500Z"
1177 | }
1178 | }
1179 | },
1180 | {
1181 | "cell_type": "code",
1182 | "execution_count": 247,
1183 | "outputs": [
1184 | {
1185 | "name": "stdout",
1186 | "output_type": "stream",
1187 | "text": [
1188 | "Number of jobs that require sql : 581\n",
1189 | "Number of jobs that require python : 266\n",
1190 | "Number of jobs that require power bi : 98\n",
1191 | "Number of jobs that require tableau : 275\n",
1192 | "Number of jobs that require excel : 533\n",
1193 | "Number of jobs that require r : 63\n"
1194 | ]
1195 | }
1196 | ],
1197 | "source": [
1198 | "for skills in skill_set:\n",
1199 | " print('Number of jobs that require', skills, ' : ' , len(df[(df['Job Description'].str.contains(skills, case = False)) & (df['Job Title']=='Data Analyst')]))"
1200 | ],
1201 | "metadata": {
1202 | "collapsed": false,
1203 | "ExecuteTime": {
1204 | "end_time": "2023-08-01T18:56:50.084458600Z",
1205 | "start_time": "2023-08-01T18:56:49.766296500Z"
1206 | }
1207 | }
1208 | },
1209 | {
1210 | "cell_type": "code",
1211 | "execution_count": 248,
1212 | "outputs": [
1213 | {
1214 | "name": "stdout",
1215 | "output_type": "stream",
1216 | "text": [
1217 | "Number of jobs that require sql : 641\n",
1218 | "Number of jobs that require python : 980\n",
1219 | "Number of jobs that require power bi : 40\n",
1220 | "Number of jobs that require tableau : 192\n",
1221 | "Number of jobs that require excel : 829\n",
1222 | "Number of jobs that require r : 168\n"
1223 | ]
1224 | }
1225 | ],
1226 | "source": [
1227 | "for skills in skill_set:\n",
1228 | " print('Number of jobs that require', skills, ' : ' , len(df[(df['Job Description'].str.contains(skills, case = False)) & (df['Job Title']=='Data Scientist')]))"
1229 | ],
1230 | "metadata": {
1231 | "collapsed": false,
1232 | "ExecuteTime": {
1233 | "end_time": "2023-08-01T18:57:05.003171100Z",
1234 | "start_time": "2023-08-01T18:57:04.613465100Z"
1235 | }
1236 | }
1237 | },
1238 | {
1239 | "cell_type": "code",
1240 | "execution_count": 249,
1241 | "outputs": [
1242 | {
1243 | "name": "stdout",
1244 | "output_type": "stream",
1245 | "text": [
1246 | "Number of jobs that require sql : 591\n",
1247 | "Number of jobs that require python : 635\n",
1248 | "Number of jobs that require power bi : 47\n",
1249 | "Number of jobs that require tableau : 133\n",
1250 | "Number of jobs that require excel : 294\n",
1251 | "Number of jobs that require r : 44\n"
1252 | ]
1253 | }
1254 | ],
1255 | "source": [
1256 | "for skills in skill_set:\n",
1257 | " print('Number of jobs that require', skills, ' : ' , len(df[(df['Job Description'].str.contains(skills, case = False)) & (df['Job Title']=='Data Engineer')]))"
1258 | ],
1259 | "metadata": {
1260 | "collapsed": false,
1261 | "ExecuteTime": {
1262 | "end_time": "2023-08-01T18:57:17.670664200Z",
1263 | "start_time": "2023-08-01T18:57:17.339594300Z"
1264 | }
1265 | }
1266 | },
1267 | {
1268 | "cell_type": "code",
1269 | "execution_count": null,
1270 | "outputs": [],
1271 | "source": [],
1272 | "metadata": {
1273 | "collapsed": false
1274 | }
1275 | }
1276 | ],
1277 | "metadata": {
1278 | "kernelspec": {
1279 | "display_name": "Python 3",
1280 | "language": "python",
1281 | "name": "python3"
1282 | },
1283 | "language_info": {
1284 | "codemirror_mode": {
1285 | "name": "ipython",
1286 | "version": 2
1287 | },
1288 | "file_extension": ".py",
1289 | "mimetype": "text/x-python",
1290 | "name": "python",
1291 | "nbconvert_exporter": "python",
1292 | "pygments_lexer": "ipython2",
1293 | "version": "2.7.6"
1294 | }
1295 | },
1296 | "nbformat": 4,
1297 | "nbformat_minor": 0
1298 | }
1299 |
--------------------------------------------------------------------------------
/05_ValueInvestorHoldings31122022_2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/05_ValueInvestorHoldings31122022_2.xlsx
--------------------------------------------------------------------------------
/06_ValueInvestorHoldings31032023.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/06_ValueInvestorHoldings31032023.xlsx
--------------------------------------------------------------------------------
/28_Lost_Customers_NW_Video.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/28_Lost_Customers_NW_Video.pbix
--------------------------------------------------------------------------------
/28_New_Customers_NW_video.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/28_New_Customers_NW_video.pbix
--------------------------------------------------------------------------------
/ActualDataset.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/ActualDataset.xlsx
--------------------------------------------------------------------------------
/BaSensei_webscrape_Bitcoin_YTVideo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true,
8 | "ExecuteTime": {
9 | "end_time": "2023-08-13T11:42:32.862157700Z",
10 | "start_time": "2023-08-13T11:42:31.597143300Z"
11 | }
12 | },
13 | "outputs": [],
14 | "source": [
15 | "from bs4 import BeautifulSoup\n",
16 | "import pandas as pd\n",
17 | "import requests\n",
18 | "import csv\n",
19 | "#from pyspark.sql import dataframe\n",
20 | "from datetime import datetime"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 2,
26 | "outputs": [],
27 | "source": [
28 | "headers = {\"User-Agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0\", \"Accept-Encoding\":\"gzip, deflate\", \"Accept\":\"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\", \"DNT\":\"1\",\"Connection\":\"close\", \"Upgrade-Insecure-Requests\":\"1\"}"
29 | ],
30 | "metadata": {
31 | "collapsed": false,
32 | "ExecuteTime": {
33 | "end_time": "2023-08-13T11:42:48.740041700Z",
34 | "start_time": "2023-08-13T11:42:48.728055600Z"
35 | }
36 | }
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 3,
41 | "outputs": [],
42 | "source": [
43 | "url = 'https://bitinfocharts.com/top-100-richest-bitcoin-addresses.html'\n",
44 | "response = requests.get(url,headers=headers).text\n",
45 | "soup = BeautifulSoup(response, \"html.parser\")"
46 | ],
47 | "metadata": {
48 | "collapsed": false,
49 | "ExecuteTime": {
50 | "end_time": "2023-08-13T11:43:14.690684900Z",
51 | "start_time": "2023-08-13T11:43:13.906464400Z"
52 | }
53 | }
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "source": [
58 | "# Paste ChaptGPT Code"
59 | ],
60 | "metadata": {
61 | "collapsed": false
62 | }
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 5,
67 | "outputs": [],
68 | "source": [
69 | "# Find the table with the specified attributes\n",
70 | "table = soup.find('table', {'class': 'table table-condensed bb', 'style': 'max-width:1000px;text-align:center;width: inherit;margin-bottom: 0;'})\n"
71 | ],
72 | "metadata": {
73 | "collapsed": false,
74 | "ExecuteTime": {
75 | "end_time": "2023-08-13T11:43:53.567113Z",
76 | "start_time": "2023-08-13T11:43:53.552132Z"
77 | }
78 | }
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 6,
83 | "outputs": [],
84 | "source": [
85 | "# Extract the headers\n",
86 | "headers = [th.text for th in table.find('thead').find_all('th')]"
87 | ],
88 | "metadata": {
89 | "collapsed": false,
90 | "ExecuteTime": {
91 | "end_time": "2023-08-13T11:43:59.555082500Z",
92 | "start_time": "2023-08-13T11:43:59.546076Z"
93 | }
94 | }
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 7,
99 | "outputs": [],
100 | "source": [
101 | "# Extract the rows\n",
102 | "rows = []\n",
103 | "for tr in table.find('tbody').find_all('tr'):\n",
104 | " rows.append([td.text.strip() for td in tr.find_all('td')])"
105 | ],
106 | "metadata": {
107 | "collapsed": false,
108 | "ExecuteTime": {
109 | "end_time": "2023-08-13T11:44:05.380681100Z",
110 | "start_time": "2023-08-13T11:44:05.371079600Z"
111 | }
112 | }
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 8,
117 | "outputs": [],
118 | "source": [
119 | "# Create a DataFrame\n",
120 | "df = pd.DataFrame(rows, columns=headers)"
121 | ],
122 | "metadata": {
123 | "collapsed": false,
124 | "ExecuteTime": {
125 | "end_time": "2023-08-13T11:44:16.715243300Z",
126 | "start_time": "2023-08-13T11:44:16.697255800Z"
127 | }
128 | }
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 9,
133 | "outputs": [],
134 | "source": [
135 | "# Add the current date and time column\n",
136 | "df['Date and Time'] = pd.Timestamp.now().strftime('%d/%m/%Y %H:%M')"
137 | ],
138 | "metadata": {
139 | "collapsed": false,
140 | "ExecuteTime": {
141 | "end_time": "2023-08-13T11:44:22.655041400Z",
142 | "start_time": "2023-08-13T11:44:22.643056Z"
143 | }
144 | }
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 10,
149 | "outputs": [
150 | {
151 | "data": {
152 | "text/plain": " Balance, BTC Addresses % Addresses (Total) Coins \\\n0 (0 - 0.00001) 3648493 7.48% (100%) 18.62 BTC \n1 [0.00001 - 0.0001) 9578291 19.64% (92.52%) 411.72 BTC \n2 [0.0001 - 0.001) 12084944 24.78% (72.88%) 4,694 BTC \n3 [0.001 - 0.01) 11236056 23.04% (48.1%) 41,775 BTC \n4 [0.01 - 0.1) 7784483 15.96% (25.06%) 261,374 BTC \n\n USD % Coins (Total) Date and Time \n0 $547,352 0% (100%) 13/08/2023 13:44 \n1 $12,104,875 0% (100%) 13/08/2023 13:44 \n2 $138,013,773 0.02% (100%) 13/08/2023 13:44 \n3 $1,228,237,337 0.21% (99.97%) 13/08/2023 13:44 \n4 $7,684,675,479 1.34% (99.76%) 13/08/2023 13:44 ",
153 | "text/html": "\n\n
\n \n \n | \n Balance, BTC | \n Addresses | \n % Addresses (Total) | \n Coins | \n USD | \n % Coins (Total) | \n Date and Time | \n
\n \n \n \n 0 | \n (0 - 0.00001) | \n 3648493 | \n 7.48% (100%) | \n 18.62 BTC | \n $547,352 | \n 0% (100%) | \n 13/08/2023 13:44 | \n
\n \n 1 | \n [0.00001 - 0.0001) | \n 9578291 | \n 19.64% (92.52%) | \n 411.72 BTC | \n $12,104,875 | \n 0% (100%) | \n 13/08/2023 13:44 | \n
\n \n 2 | \n [0.0001 - 0.001) | \n 12084944 | \n 24.78% (72.88%) | \n 4,694 BTC | \n $138,013,773 | \n 0.02% (100%) | \n 13/08/2023 13:44 | \n
\n \n 3 | \n [0.001 - 0.01) | \n 11236056 | \n 23.04% (48.1%) | \n 41,775 BTC | \n $1,228,237,337 | \n 0.21% (99.97%) | \n 13/08/2023 13:44 | \n
\n \n 4 | \n [0.01 - 0.1) | \n 7784483 | \n 15.96% (25.06%) | \n 261,374 BTC | \n $7,684,675,479 | \n 1.34% (99.76%) | \n 13/08/2023 13:44 | \n
\n \n
\n
"
154 | },
155 | "execution_count": 10,
156 | "metadata": {},
157 | "output_type": "execute_result"
158 | }
159 | ],
160 | "source": [
161 | "# Display the first few rows of the DataFrame\n",
162 | "df.head()"
163 | ],
164 | "metadata": {
165 | "collapsed": false,
166 | "ExecuteTime": {
167 | "end_time": "2023-08-13T11:44:27.161515300Z",
168 | "start_time": "2023-08-13T11:44:27.146054300Z"
169 | }
170 | }
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 11,
175 | "outputs": [
176 | {
177 | "data": {
178 | "text/plain": " Balance, BTC Addresses % Addresses (Total) Coins \\\n0 (0 - 0.00001) 3648493 0.0748 18.62 BTC \n1 [0.00001 - 0.0001) 9578291 0.1964 411.72 BTC \n2 [0.0001 - 0.001) 12084944 0.2478 4,694 BTC \n3 [0.001 - 0.01) 11236056 0.2304 41,775 BTC \n4 [0.01 - 0.1) 7784483 0.1596 261,374 BTC \n\n USD % Coins (Total) Date and Time \n0 $547,352 0% (100%) 13/08/2023 13:44 \n1 $12,104,875 0% (100%) 13/08/2023 13:44 \n2 $138,013,773 0.02% (100%) 13/08/2023 13:44 \n3 $1,228,237,337 0.21% (99.97%) 13/08/2023 13:44 \n4 $7,684,675,479 1.34% (99.76%) 13/08/2023 13:44 ",
179 | "text/html": "\n\n
\n \n \n | \n Balance, BTC | \n Addresses | \n % Addresses (Total) | \n Coins | \n USD | \n % Coins (Total) | \n Date and Time | \n
\n \n \n \n 0 | \n (0 - 0.00001) | \n 3648493 | \n 0.0748 | \n 18.62 BTC | \n $547,352 | \n 0% (100%) | \n 13/08/2023 13:44 | \n
\n \n 1 | \n [0.00001 - 0.0001) | \n 9578291 | \n 0.1964 | \n 411.72 BTC | \n $12,104,875 | \n 0% (100%) | \n 13/08/2023 13:44 | \n
\n \n 2 | \n [0.0001 - 0.001) | \n 12084944 | \n 0.2478 | \n 4,694 BTC | \n $138,013,773 | \n 0.02% (100%) | \n 13/08/2023 13:44 | \n
\n \n 3 | \n [0.001 - 0.01) | \n 11236056 | \n 0.2304 | \n 41,775 BTC | \n $1,228,237,337 | \n 0.21% (99.97%) | \n 13/08/2023 13:44 | \n
\n \n 4 | \n [0.01 - 0.1) | \n 7784483 | \n 0.1596 | \n 261,374 BTC | \n $7,684,675,479 | \n 1.34% (99.76%) | \n 13/08/2023 13:44 | \n
\n \n
\n
"
180 | },
181 | "execution_count": 11,
182 | "metadata": {},
183 | "output_type": "execute_result"
184 | }
185 | ],
186 | "source": [
187 | "# Split the \"% Addresses (Total)\" column by the \"(\" delimiter and keep only the first part\n",
188 | "df['% Addresses (Total)'] = df['% Addresses (Total)'].apply(lambda x: x.split('(')[0].strip())\n",
189 | "\n",
190 | "# Convert the values to percentage\n",
191 | "df['% Addresses (Total)'] = df['% Addresses (Total)'].str.rstrip('%').astype('float') / 100\n",
192 | "\n",
193 | "# Display the updated DataFrame\n",
194 | "df.head()"
195 | ],
196 | "metadata": {
197 | "collapsed": false,
198 | "ExecuteTime": {
199 | "end_time": "2023-08-13T11:45:06.909086800Z",
200 | "start_time": "2023-08-13T11:45:06.898549400Z"
201 | }
202 | }
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 12,
207 | "outputs": [
208 | {
209 | "data": {
210 | "text/plain": " Balance, BTC Addresses % Addresses (Total) Coins \\\n0 (0 - 0.00001) 3648493 0.0748 18.62 \n1 [0.00001 - 0.0001) 9578291 0.1964 411.72 \n2 [0.0001 - 0.001) 12084944 0.2478 4694.00 \n3 [0.001 - 0.01) 11236056 0.2304 41775.00 \n4 [0.01 - 0.1) 7784483 0.1596 261374.00 \n\n USD % Coins (Total) Date and Time \n0 $547,352 0% (100%) 13/08/2023 13:44 \n1 $12,104,875 0% (100%) 13/08/2023 13:44 \n2 $138,013,773 0.02% (100%) 13/08/2023 13:44 \n3 $1,228,237,337 0.21% (99.97%) 13/08/2023 13:44 \n4 $7,684,675,479 1.34% (99.76%) 13/08/2023 13:44 ",
211 | "text/html": "\n\n
\n \n \n | \n Balance, BTC | \n Addresses | \n % Addresses (Total) | \n Coins | \n USD | \n % Coins (Total) | \n Date and Time | \n
\n \n \n \n 0 | \n (0 - 0.00001) | \n 3648493 | \n 0.0748 | \n 18.62 | \n $547,352 | \n 0% (100%) | \n 13/08/2023 13:44 | \n
\n \n 1 | \n [0.00001 - 0.0001) | \n 9578291 | \n 0.1964 | \n 411.72 | \n $12,104,875 | \n 0% (100%) | \n 13/08/2023 13:44 | \n
\n \n 2 | \n [0.0001 - 0.001) | \n 12084944 | \n 0.2478 | \n 4694.00 | \n $138,013,773 | \n 0.02% (100%) | \n 13/08/2023 13:44 | \n
\n \n 3 | \n [0.001 - 0.01) | \n 11236056 | \n 0.2304 | \n 41775.00 | \n $1,228,237,337 | \n 0.21% (99.97%) | \n 13/08/2023 13:44 | \n
\n \n 4 | \n [0.01 - 0.1) | \n 7784483 | \n 0.1596 | \n 261374.00 | \n $7,684,675,479 | \n 1.34% (99.76%) | \n 13/08/2023 13:44 | \n
\n \n
\n
"
212 | },
213 | "execution_count": 12,
214 | "metadata": {},
215 | "output_type": "execute_result"
216 | }
217 | ],
218 | "source": [
219 | "# Remove the \"BTC\" string and commas from the \"Coins\" column, then convert to decimal data type\n",
220 | "df['Coins'] = df['Coins'].replace('[BTC,]', '', regex=True).astype(float)\n",
221 | "\n",
222 | "# Round the values to 2 decimal places\n",
223 | "df['Coins'] = df['Coins'].round(2)\n",
224 | "\n",
225 | "# Display the updated DataFrame\n",
226 | "df.head()"
227 | ],
228 | "metadata": {
229 | "collapsed": false,
230 | "ExecuteTime": {
231 | "end_time": "2023-08-13T11:45:30.138855800Z",
232 | "start_time": "2023-08-13T11:45:30.114909900Z"
233 | }
234 | }
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 13,
239 | "outputs": [
240 | {
241 | "data": {
242 | "text/plain": " Balance, BTC Addresses % Addresses (Total) Coins \\\n0 (0 - 0.00001) 3648493 0.0748 18.62 \n1 [0.00001 - 0.0001) 9578291 0.1964 411.72 \n2 [0.0001 - 0.001) 12084944 0.2478 4694.00 \n3 [0.001 - 0.01) 11236056 0.2304 41775.00 \n4 [0.01 - 0.1) 7784483 0.1596 261374.00 \n\n USD % Coins (Total) Date and Time \n0 $547,352 0.0000 13/08/2023 13:44 \n1 $12,104,875 0.0000 13/08/2023 13:44 \n2 $138,013,773 0.0002 13/08/2023 13:44 \n3 $1,228,237,337 0.0021 13/08/2023 13:44 \n4 $7,684,675,479 0.0134 13/08/2023 13:44 ",
243 | "text/html": "\n\n
\n \n \n | \n Balance, BTC | \n Addresses | \n % Addresses (Total) | \n Coins | \n USD | \n % Coins (Total) | \n Date and Time | \n
\n \n \n \n 0 | \n (0 - 0.00001) | \n 3648493 | \n 0.0748 | \n 18.62 | \n $547,352 | \n 0.0000 | \n 13/08/2023 13:44 | \n
\n \n 1 | \n [0.00001 - 0.0001) | \n 9578291 | \n 0.1964 | \n 411.72 | \n $12,104,875 | \n 0.0000 | \n 13/08/2023 13:44 | \n
\n \n 2 | \n [0.0001 - 0.001) | \n 12084944 | \n 0.2478 | \n 4694.00 | \n $138,013,773 | \n 0.0002 | \n 13/08/2023 13:44 | \n
\n \n 3 | \n [0.001 - 0.01) | \n 11236056 | \n 0.2304 | \n 41775.00 | \n $1,228,237,337 | \n 0.0021 | \n 13/08/2023 13:44 | \n
\n \n 4 | \n [0.01 - 0.1) | \n 7784483 | \n 0.1596 | \n 261374.00 | \n $7,684,675,479 | \n 0.0134 | \n 13/08/2023 13:44 | \n
\n \n
\n
"
244 | },
245 | "execution_count": 13,
246 | "metadata": {},
247 | "output_type": "execute_result"
248 | }
249 | ],
250 | "source": [
251 | "# Split the \"% Coins (Total)\" column by the \"(\" delimiter and keep only the first part\n",
252 | "df['% Coins (Total)'] = df['% Coins (Total)'].apply(lambda x: x.split('(')[0].strip())\n",
253 | "\n",
254 | "# Convert the values to percentage\n",
255 | "df['% Coins (Total)'] = df['% Coins (Total)'].str.rstrip('%').astype('float') / 100\n",
256 | "\n",
257 | "# Display the updated DataFrame\n",
258 | "df.head()"
259 | ],
260 | "metadata": {
261 | "collapsed": false,
262 | "ExecuteTime": {
263 | "end_time": "2023-08-13T11:45:55.422054800Z",
264 | "start_time": "2023-08-13T11:45:55.402557200Z"
265 | }
266 | }
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 15,
271 | "outputs": [],
272 | "source": [
273 | "# Define the path for the CSV file\n",
274 | "csv_file_path = \"bitcoin_wealth_distribution.csv\"\n",
275 | "\n",
276 | "# Save the DataFrame to a CSV file\n",
277 | "df.to_csv(csv_file_path, index=False)"
278 | ],
279 | "metadata": {
280 | "collapsed": false,
281 | "ExecuteTime": {
282 | "end_time": "2023-08-13T11:46:27.361153Z",
283 | "start_time": "2023-08-13T11:46:27.342234700Z"
284 | }
285 | }
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": null,
290 | "outputs": [],
291 | "source": [],
292 | "metadata": {
293 | "collapsed": false
294 | }
295 | }
296 | ],
297 | "metadata": {
298 | "kernelspec": {
299 | "display_name": "Python 3",
300 | "language": "python",
301 | "name": "python3"
302 | },
303 | "language_info": {
304 | "codemirror_mode": {
305 | "name": "ipython",
306 | "version": 2
307 | },
308 | "file_extension": ".py",
309 | "mimetype": "text/x-python",
310 | "name": "python",
311 | "nbconvert_exporter": "python",
312 | "pygments_lexer": "ipython2",
313 | "version": "2.7.6"
314 | }
315 | },
316 | "nbformat": 4,
317 | "nbformat_minor": 0
318 | }
319 |
--------------------------------------------------------------------------------
/BrokerStatement.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/BrokerStatement.xlsx
--------------------------------------------------------------------------------
/BrokerStatement2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/BrokerStatement2.xlsx
--------------------------------------------------------------------------------
/BrokerStatement3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/BrokerStatement3.xlsx
--------------------------------------------------------------------------------
/DAX_Dates.txt:
--------------------------------------------------------------------------------
1 | Dim_Holding_Date =
2 | VAR FirstFiscalMonth = 7 -- First month of fiscal year
3 | VAR FirstDayOfWeek = 0 -- 0 = Sunday, 1 = Monday, ...
4 | VAR FirstYear = -- Customize first year to use
5 | YEAR ( MIN ( 'Fact_Holdings'[PortfolioDate] ))
6 | RETURN
7 | GENERATE (
8 | FILTER (
9 | CALENDARAUTO (),
10 | YEAR ( [Date] ) >= FirstYear
11 | ),
12 | VAR Yr = YEAR ( [Date] ) -- Year Number
13 | VAR Mn = MONTH ( [Date] ) -- Month Number (1-12)
14 | VAR Qr = QUARTER ( [Date] ) -- Quarter Number (1-4)
15 | VAR MnQ = Mn - 3 * (Qr - 1) -- Month in Quarter (1-3)
16 | VAR Wd = WEEKDAY ( [Date], 1 ) - 1 -- Week day number (0 = Sunday, 1 = Monday, ...)
17 | VAR Fyr = -- Fiscal Year Number
18 | Yr + 1 * ( FirstFiscalMonth > 1 && Mn >= FirstFiscalMonth )
19 | VAR Fqr = -- Fiscal Quarter (string)
20 | FORMAT ( EOMONTH ( [Date], 1 - FirstFiscalMonth ), "\QQ" )
21 | RETURN ROW (
22 | "Year", DATE ( Yr, 12, 31 ),
23 | "Year Quarter", FORMAT ( [Date], "\QQ-YYYY" ),
24 | "Year Quarter Date", EOMONTH ( [Date], 3 - MnQ ),
25 | "Quarter", FORMAT ( [Date], "\QQ" ),
26 | -- "Year Month", EOMONTH ( [Date], 0 ), -- use this for end-of-month
27 | "Year Month", EOMONTH ( [Date], -1 ) + 1, -- use this for beginning-of-month
28 | "Month", DATE ( 1900, MONTH ( [Date] ), 1 ),
29 | "Day of Week", DATE ( 1900, 1, 7 + Wd + (7 * (Wd < FirstDayOfWeek)) ),
30 | "Fiscal Year", DATE ( Fyr + (FirstFiscalMonth = 1), FirstFiscalMonth, 1 ) - 1,
31 | "Fiscal Year Quarter", "F" & Fqr & "-" & Fyr,
32 | "Fiscal Year Quarter Date", EOMONTH ( [Date], 3 - MnQ ),
33 | "Fiscal Quarter", "F" & Fqr
34 | )
35 | )
36 |
--------------------------------------------------------------------------------
/DataProfiling_Video.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "# Import Libraries"
7 | ],
8 | "metadata": {
9 | "collapsed": false
10 | }
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 11,
15 | "outputs": [],
16 | "source": [
17 | "import pandas as pd\n",
18 | "from pandas_profiling import ProfileReport"
19 | ],
20 | "metadata": {
21 | "collapsed": false,
22 | "ExecuteTime": {
23 | "end_time": "2023-08-06T16:36:26.140875800Z",
24 | "start_time": "2023-08-06T16:36:26.136780200Z"
25 | }
26 | }
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 12,
31 | "outputs": [],
32 | "source": [
33 | "#Load the Data\n",
34 | "df = pd.read_csv('CleanderDataScience.csv')"
35 | ],
36 | "metadata": {
37 | "collapsed": false,
38 | "ExecuteTime": {
39 | "end_time": "2023-08-06T16:36:53.770667200Z",
40 | "start_time": "2023-08-06T16:36:53.667496Z"
41 | }
42 | }
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 14,
47 | "outputs": [
48 | {
49 | "data": {
50 | "text/plain": "(3498, 8)"
51 | },
52 | "execution_count": 14,
53 | "metadata": {},
54 | "output_type": "execute_result"
55 | }
56 | ],
57 | "source": [
58 | "#check Shape\n",
59 | "df.shape"
60 | ],
61 | "metadata": {
62 | "collapsed": false,
63 | "ExecuteTime": {
64 | "end_time": "2023-08-06T16:37:04.764610200Z",
65 | "start_time": "2023-08-06T16:37:04.719711600Z"
66 | }
67 | }
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 15,
72 | "outputs": [
73 | {
74 | "data": {
75 | "text/plain": " Unnamed: 0 Job Title \\\n1780 2004 Data Analyst \n3028 3367 Data Analyst \n1398 1584 Data Engineer \n626 683 Data Scientist \n1659 1872 Data Scientist \n1138 1279 Data Scientist \n1766 1989 Data Scientist \n733 809 Data Analyst \n327 354 Data Scientist \n2154 2418 Data Scientist \n\n Job Description \\\n1780 Purpose of Job\\nWe are currently seeking a tal... \n3028 Business Intelligence AnalystLocation: Austin,... \n1398 Job Title Senior Data Engineer Location Chandl... \n626 As a Project Scientist, you may assist in prep... \n1659 Site Name: USA - Pennsylvania - Upper Providen... \n1138 Min Qualifications\\n\\nREQUIRED EDUCATION / EXP... \n1766 LMI is a government consulting firm, dedicated... \n733 Kelly Services is seeking a Provider Analytics... \n327 In this role you will join the Aviana Data Sci... \n2154 Description\\nScientist/Associate Scientist Can... \n\n Company Name Location \\\n1780 USAA San Antonio, TX \n3028 Iconma, L.L.C. Austin, TX \n1398 ICST, LLC Chandler, AZ \n626 Cedars-Sinai Los Angeles, CA \n1659 GSK Collegeville, PA \n1138 The University of Texas Medical Branch Webster, TX \n1766 LMI San Antonio, TX \n733 Kelly Chicago, IL \n327 Aviana Global Technologies Brea, CA \n2154 Fate Therapeutics, Inc. San Diego, CA \n\n Industry Est Min Salary Est Max Salary \n1780 Insurance Carriers 74000 140000 \n3028 Staffing & Outsourcing 73000 111000 \n1398 Staffing & Outsourcing 84000 101000 \n626 Health Care Services & Hospitals 136000 164000 \n1659 Biotech & Pharmaceuticals 143000 237000 \n1138 Health Care Services & Hospitals 73000 136000 \n1766 Consulting 54000 92000 \n733 Staffing & Outsourcing 31000 56000 \n327 IT Services 102000 164000 \n2154 Accounting 112000 211000 ",
76 | "text/html": "\n\n
\n \n \n | \n Unnamed: 0 | \n Job Title | \n Job Description | \n Company Name | \n Location | \n Industry | \n Est Min Salary | \n Est Max Salary | \n
\n \n \n \n 1780 | \n 2004 | \n Data Analyst | \n Purpose of Job\\nWe are currently seeking a tal... | \n USAA | \n San Antonio, TX | \n Insurance Carriers | \n 74000 | \n 140000 | \n
\n \n 3028 | \n 3367 | \n Data Analyst | \n Business Intelligence AnalystLocation: Austin,... | \n Iconma, L.L.C. | \n Austin, TX | \n Staffing & Outsourcing | \n 73000 | \n 111000 | \n
\n \n 1398 | \n 1584 | \n Data Engineer | \n Job Title Senior Data Engineer Location Chandl... | \n ICST, LLC | \n Chandler, AZ | \n Staffing & Outsourcing | \n 84000 | \n 101000 | \n
\n \n 626 | \n 683 | \n Data Scientist | \n As a Project Scientist, you may assist in prep... | \n Cedars-Sinai | \n Los Angeles, CA | \n Health Care Services & Hospitals | \n 136000 | \n 164000 | \n
\n \n 1659 | \n 1872 | \n Data Scientist | \n Site Name: USA - Pennsylvania - Upper Providen... | \n GSK | \n Collegeville, PA | \n Biotech & Pharmaceuticals | \n 143000 | \n 237000 | \n
\n \n 1138 | \n 1279 | \n Data Scientist | \n Min Qualifications\\n\\nREQUIRED EDUCATION / EXP... | \n The University of Texas Medical Branch | \n Webster, TX | \n Health Care Services & Hospitals | \n 73000 | \n 136000 | \n
\n \n 1766 | \n 1989 | \n Data Scientist | \n LMI is a government consulting firm, dedicated... | \n LMI | \n San Antonio, TX | \n Consulting | \n 54000 | \n 92000 | \n
\n \n 733 | \n 809 | \n Data Analyst | \n Kelly Services is seeking a Provider Analytics... | \n Kelly | \n Chicago, IL | \n Staffing & Outsourcing | \n 31000 | \n 56000 | \n
\n \n 327 | \n 354 | \n Data Scientist | \n In this role you will join the Aviana Data Sci... | \n Aviana Global Technologies | \n Brea, CA | \n IT Services | \n 102000 | \n 164000 | \n
\n \n 2154 | \n 2418 | \n Data Scientist | \n Description\\nScientist/Associate Scientist Can... | \n Fate Therapeutics, Inc. | \n San Diego, CA | \n Accounting | \n 112000 | \n 211000 | \n
\n \n
\n
"
77 | },
78 | "execution_count": 15,
79 | "metadata": {},
80 | "output_type": "execute_result"
81 | }
82 | ],
83 | "source": [
84 | "#Check sample\n",
85 | "df.sample(10)"
86 | ],
87 | "metadata": {
88 | "collapsed": false,
89 | "ExecuteTime": {
90 | "end_time": "2023-08-06T16:37:14.114374300Z",
91 | "start_time": "2023-08-06T16:37:14.103861100Z"
92 | }
93 | }
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 16,
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": " Unnamed: 0 Est Min Salary Est Max Salary\ncount 3498.000000 3498.000000 3498.000000\nmean 1955.409949 82785.591767 135068.610635\nstd 1129.056284 34270.898430 44593.302576\nmin 0.000000 12000.000000 56000.000000\n25% 970.250000 54000.000000 98000.000000\n50% 1970.500000 79000.000000 130000.000000\n75% 2933.750000 111000.000000 165750.000000\nmax 3908.000000 200000.000000 254000.000000",
102 | "text/html": "\n\n
\n \n \n | \n Unnamed: 0 | \n Est Min Salary | \n Est Max Salary | \n
\n \n \n \n count | \n 3498.000000 | \n 3498.000000 | \n 3498.000000 | \n
\n \n mean | \n 1955.409949 | \n 82785.591767 | \n 135068.610635 | \n
\n \n std | \n 1129.056284 | \n 34270.898430 | \n 44593.302576 | \n
\n \n min | \n 0.000000 | \n 12000.000000 | \n 56000.000000 | \n
\n \n 25% | \n 970.250000 | \n 54000.000000 | \n 98000.000000 | \n
\n \n 50% | \n 1970.500000 | \n 79000.000000 | \n 130000.000000 | \n
\n \n 75% | \n 2933.750000 | \n 111000.000000 | \n 165750.000000 | \n
\n \n max | \n 3908.000000 | \n 200000.000000 | \n 254000.000000 | \n
\n \n
\n
"
103 | },
104 | "execution_count": 16,
105 | "metadata": {},
106 | "output_type": "execute_result"
107 | }
108 | ],
109 | "source": [
110 | "#check description\n",
111 | "df.describe()"
112 | ],
113 | "metadata": {
114 | "collapsed": false,
115 | "ExecuteTime": {
116 | "end_time": "2023-08-06T16:37:24.690856600Z",
117 | "start_time": "2023-08-06T16:37:24.664272600Z"
118 | }
119 | }
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "source": [
124 | "# Run pandas Profiling"
125 | ],
126 | "metadata": {
127 | "collapsed": false
128 | }
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 17,
133 | "outputs": [],
134 | "source": [
135 | "report = ProfileReport(df, title=\"Data Science Salaries Profile\")\n"
136 | ],
137 | "metadata": {
138 | "collapsed": false,
139 | "ExecuteTime": {
140 | "end_time": "2023-08-06T16:38:34.197617Z",
141 | "start_time": "2023-08-06T16:38:34.170074800Z"
142 | }
143 | }
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 18,
148 | "outputs": [
149 | {
150 | "data": {
151 | "text/plain": "Summarize dataset: 0%| | 0/5 [00:00, ?it/s]",
152 | "application/vnd.jupyter.widget-view+json": {
153 | "version_major": 2,
154 | "version_minor": 0,
155 | "model_id": "1e14d6fe3d53435f9768de8d58ae611b"
156 | }
157 | },
158 | "metadata": {},
159 | "output_type": "display_data"
160 | },
161 | {
162 | "data": {
163 | "text/plain": "Generate report structure: 0%| | 0/1 [00:00, ?it/s]",
164 | "application/vnd.jupyter.widget-view+json": {
165 | "version_major": 2,
166 | "version_minor": 0,
167 | "model_id": "bbd5c3665a764903b63ad7a2531a9cc5"
168 | }
169 | },
170 | "metadata": {},
171 | "output_type": "display_data"
172 | },
173 | {
174 | "data": {
175 | "text/plain": "Render HTML: 0%| | 0/1 [00:00, ?it/s]",
176 | "application/vnd.jupyter.widget-view+json": {
177 | "version_major": 2,
178 | "version_minor": 0,
179 | "model_id": "5ff884339ec84373bd4ba65e45d5b5bd"
180 | }
181 | },
182 | "metadata": {},
183 | "output_type": "display_data"
184 | },
185 | {
186 | "data": {
187 | "text/plain": "Export report to file: 0%| | 0/1 [00:00, ?it/s]",
188 | "application/vnd.jupyter.widget-view+json": {
189 | "version_major": 2,
190 | "version_minor": 0,
191 | "model_id": "4b660ed747eb4752a5fa909d74487659"
192 | }
193 | },
194 | "metadata": {},
195 | "output_type": "display_data"
196 | }
197 | ],
198 | "source": [
199 | "report.to_file('DataScientist3.html')"
200 | ],
201 | "metadata": {
202 | "collapsed": false,
203 | "ExecuteTime": {
204 | "end_time": "2023-08-06T16:39:10.670023100Z",
205 | "start_time": "2023-08-06T16:39:05.629457400Z"
206 | }
207 | }
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 19,
212 | "outputs": [],
213 | "source": [
214 | "df_dirty = pd.read_csv('DataScientist2.csv', na_values=-1)"
215 | ],
216 | "metadata": {
217 | "collapsed": false,
218 | "ExecuteTime": {
219 | "end_time": "2023-08-06T16:44:18.012346700Z",
220 | "start_time": "2023-08-06T16:44:17.889347Z"
221 | }
222 | }
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": 20,
227 | "outputs": [
228 | {
229 | "data": {
230 | "text/plain": "Summarize dataset: 0%| | 0/5 [00:00, ?it/s]",
231 | "application/vnd.jupyter.widget-view+json": {
232 | "version_major": 2,
233 | "version_minor": 0,
234 | "model_id": "3cde2fbe575c4a5c821bd6df18915e73"
235 | }
236 | },
237 | "metadata": {},
238 | "output_type": "display_data"
239 | },
240 | {
241 | "data": {
242 | "text/plain": "Generate report structure: 0%| | 0/1 [00:00, ?it/s]",
243 | "application/vnd.jupyter.widget-view+json": {
244 | "version_major": 2,
245 | "version_minor": 0,
246 | "model_id": "bad71b6b1bea44df9809f0ee0241d388"
247 | }
248 | },
249 | "metadata": {},
250 | "output_type": "display_data"
251 | },
252 | {
253 | "data": {
254 | "text/plain": "Render HTML: 0%| | 0/1 [00:00, ?it/s]",
255 | "application/vnd.jupyter.widget-view+json": {
256 | "version_major": 2,
257 | "version_minor": 0,
258 | "model_id": "29d05002174b4eca999dd026db33a3d2"
259 | }
260 | },
261 | "metadata": {},
262 | "output_type": "display_data"
263 | },
264 | {
265 | "data": {
266 | "text/plain": "Export report to file: 0%| | 0/1 [00:00, ?it/s]",
267 | "application/vnd.jupyter.widget-view+json": {
268 | "version_major": 2,
269 | "version_minor": 0,
270 | "model_id": "38c2d85cba37433c98fe8451db9f03a4"
271 | }
272 | },
273 | "metadata": {},
274 | "output_type": "display_data"
275 | }
276 | ],
277 | "source": [
278 | "report_dirty = ProfileReport(df_dirty, title=\"Dirty Profile\")\n",
279 | "report_dirty.to_file('DatascienceDirty.html')"
280 | ],
281 | "metadata": {
282 | "collapsed": false,
283 | "ExecuteTime": {
284 | "end_time": "2023-08-06T16:45:18.962900200Z",
285 | "start_time": "2023-08-06T16:45:11.540727800Z"
286 | }
287 | }
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": null,
292 | "outputs": [],
293 | "source": [],
294 | "metadata": {
295 | "collapsed": false
296 | }
297 | }
298 | ],
299 | "metadata": {
300 | "kernelspec": {
301 | "display_name": "Python 3",
302 | "language": "python",
303 | "name": "python3"
304 | },
305 | "language_info": {
306 | "codemirror_mode": {
307 | "name": "ipython",
308 | "version": 2
309 | },
310 | "file_extension": ".py",
311 | "mimetype": "text/x-python",
312 | "name": "python",
313 | "nbconvert_exporter": "python",
314 | "pygments_lexer": "ipython2",
315 | "version": "2.7.6"
316 | }
317 | },
318 | "nbformat": 4,
319 | "nbformat_minor": 0
320 | }
321 |
--------------------------------------------------------------------------------
/File1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/File1.xlsx
--------------------------------------------------------------------------------
/File3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/File3.xlsx
--------------------------------------------------------------------------------
/Holdings_30062022.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Holdings_30062022.xlsx
--------------------------------------------------------------------------------
/Holdings_30092022.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Holdings_30092022.xlsx
--------------------------------------------------------------------------------
/Holdings_31032022.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Holdings_31032022.xlsx
--------------------------------------------------------------------------------
/Holdings_31122022.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Holdings_31122022.xlsx
--------------------------------------------------------------------------------
/HorizontalDatasheet.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/HorizontalDatasheet.xlsx
--------------------------------------------------------------------------------
/PBI_CrossOverChart_SVB_CS.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PBI_CrossOverChart_SVB_CS.pbix
--------------------------------------------------------------------------------
/PBI_Holdings_Source.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PBI_Holdings_Source.xlsx
--------------------------------------------------------------------------------
/PBI_MOM_Video.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PBI_MOM_Video.pbix
--------------------------------------------------------------------------------
/PBI_Simple_Moving_Average_Video.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PBI_Simple_Moving_Average_Video.pbix
--------------------------------------------------------------------------------
/PQ_Advanced_Grouping_With_Max.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Advanced_Grouping_With_Max.xlsx
--------------------------------------------------------------------------------
/PQ_AllSeasons_Video_2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_AllSeasons_Video_2.xlsx
--------------------------------------------------------------------------------
/PQ_BUFFER_VIDEO.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_BUFFER_VIDEO.xlsx
--------------------------------------------------------------------------------
/PQ_BankStatement_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_BankStatement_Video.xlsx
--------------------------------------------------------------------------------
/PQ_BlankRow_video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_BlankRow_video.xlsx
--------------------------------------------------------------------------------
/PQ_Budget_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Budget_Video.xlsx
--------------------------------------------------------------------------------
/PQ_ChatGPT_MarketVideo.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ChatGPT_MarketVideo.pbix
--------------------------------------------------------------------------------
/PQ_ClosestNextHoliday_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ClosestNextHoliday_Video.xlsx
--------------------------------------------------------------------------------
/PQ_ColumnGroups_BaSensei_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ColumnGroups_BaSensei_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Consecutivenumbers_Video2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Consecutivenumbers_Video2.xlsx
--------------------------------------------------------------------------------
/PQ_Counter_Column_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Counter_Column_Video.xlsx
--------------------------------------------------------------------------------
/PQ_DOUBLE_BARREL_2_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_DOUBLE_BARREL_2_Video.xlsx
--------------------------------------------------------------------------------
/PQ_DoubleBarrel_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_DoubleBarrel_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Double_Headers_YT_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Double_Headers_YT_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Dups_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Dups_Video.xlsx
--------------------------------------------------------------------------------
/PQ_DynamicSplit_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_DynamicSplit_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Dynamic_Split_Header_Names.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Dynamic_Split_Header_Names.xlsx
--------------------------------------------------------------------------------
/PQ_Dynamic_T_B_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Dynamic_T_B_Video.xlsx
--------------------------------------------------------------------------------
/PQ_EACH_VIDEO.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_EACH_VIDEO.xlsx
--------------------------------------------------------------------------------
/PQ_EMAIL_VIDEO.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_EMAIL_VIDEO.xlsx
--------------------------------------------------------------------------------
/PQ_Expense_Allocations_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Expense_Allocations_Video.xlsx
--------------------------------------------------------------------------------
/PQ_FILTER_PARAM_VIDEO.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_FILTER_PARAM_VIDEO.xlsx
--------------------------------------------------------------------------------
/PQ_FILTER_TEXT_INPUT_OR_POSITION.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_FILTER_TEXT_INPUT_OR_POSITION.xlsx
--------------------------------------------------------------------------------
/PQ_Filter_Before_Expand_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Filter_Before_Expand_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Filter_Columns_Once_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Filter_Columns_Once_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Grouping_Video_2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Grouping_Video_2.xlsx
--------------------------------------------------------------------------------
/PQ_HorzontalStack_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_HorzontalStack_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Jagged_Stacked_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Jagged_Stacked_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Jagged_Tables_Source.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Jagged_Tables_Source.xlsx
--------------------------------------------------------------------------------
/PQ_JunkRows_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_JunkRows_Video.xlsx
--------------------------------------------------------------------------------
/PQ_LOOPING_VIDEO_2.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_LOOPING_VIDEO_2.pbix
--------------------------------------------------------------------------------
/PQ_LastNTotal_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_LastNTotal_Video.xlsx
--------------------------------------------------------------------------------
/PQ_ListAccum_NewColumns_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ListAccum_NewColumns_Video.xlsx
--------------------------------------------------------------------------------
/PQ_ListAlernate_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ListAlernate_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Listaccumulate_video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Listaccumulate_video.xlsx
--------------------------------------------------------------------------------
/PQ_LsatDate_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_LsatDate_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Max_Value_Row_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Max_Value_Row_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Merge_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Merge_Video.xlsx
--------------------------------------------------------------------------------
/PQ_OCCURANCE_COUNT_VIDEO.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_OCCURANCE_COUNT_VIDEO.xlsx
--------------------------------------------------------------------------------
/PQ_OpenAI_Pyhton.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_OpenAI_Pyhton.xlsx
--------------------------------------------------------------------------------
/PQ_PadMiddle_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_PadMiddle_Video.xlsx
--------------------------------------------------------------------------------
/PQ_ParsingDelimitedDataToTable_Video2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ParsingDelimitedDataToTable_Video2.xlsx
--------------------------------------------------------------------------------
/PQ_Pattern_Extraction_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Pattern_Extraction_Video.xlsx
--------------------------------------------------------------------------------
/PQ_PercentageOfTotal_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_PercentageOfTotal_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Portfolio_Comparer_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Portfolio_Comparer_Video.xlsx
--------------------------------------------------------------------------------
/PQ_PreviousRowsEmployeeTimeLine.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_PreviousRowsEmployeeTimeLine.xlsx
--------------------------------------------------------------------------------
/PQ_PreviousRowsEmployeeTimeLine_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_PreviousRowsEmployeeTimeLine_Video.xlsx
--------------------------------------------------------------------------------
/PQ_RatingsData.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_RatingsData.xlsx
--------------------------------------------------------------------------------
/PQ_ReportheaderIntoReportVideo.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ReportheaderIntoReportVideo.xlsx
--------------------------------------------------------------------------------
/PQ_ReverseFillDown_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_ReverseFillDown_Video.xlsx
--------------------------------------------------------------------------------
/PQ_SPRatings_ListAccum_Source_Video2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_SPRatings_ListAccum_Source_Video2.xlsx
--------------------------------------------------------------------------------
/PQ_Song_List_Stacked_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Song_List_Stacked_Video.xlsx
--------------------------------------------------------------------------------
/PQ_SourceData_Portfolios.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_SourceData_Portfolios.xlsx
--------------------------------------------------------------------------------
/PQ_StepsProcess_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_StepsProcess_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Stock_Groupings_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Stock_Groupings_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Subtotals_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Subtotals_Video.xlsx
--------------------------------------------------------------------------------
/PQ_TextReverseSorting_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_TextReverseSorting_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Unstack_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Unstack_Data.xlsx
--------------------------------------------------------------------------------
/PQ_Unstack_Data_Uneven_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Unstack_Data_Uneven_Video.xlsx
--------------------------------------------------------------------------------
/PQ_VALUE_ALL_COLUMNS_VIDEO.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_VALUE_ALL_COLUMNS_VIDEO.xlsx
--------------------------------------------------------------------------------
/PQ_Working Days_2_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Working Days_2_Video.xlsx
--------------------------------------------------------------------------------
/PQ_Working Hours_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_Working Hours_Video.xlsx
--------------------------------------------------------------------------------
/PQ_bulkReplace_YT_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_bulkReplace_YT_Video.xlsx
--------------------------------------------------------------------------------
/PQ_combineWithdifferntColumnNames.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PQ_combineWithdifferntColumnNames.xlsx
--------------------------------------------------------------------------------
/Portfolios1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Portfolios1.xlsx
--------------------------------------------------------------------------------
/Portfolios2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Portfolios2.xlsx
--------------------------------------------------------------------------------
/Portfolios3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Portfolios3.xlsx
--------------------------------------------------------------------------------
/PowerQuery_Dynamically_Clean_DataSet_Headers.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/PowerQuery_Dynamically_Clean_DataSet_Headers.xlsx
--------------------------------------------------------------------------------
/Pq_Address_Split_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_Address_Split_Video.xlsx
--------------------------------------------------------------------------------
/Pq_AttribuetDescription_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_AttribuetDescription_Video.xlsx
--------------------------------------------------------------------------------
/Pq_Conditionally_Replace_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_Conditionally_Replace_Video.xlsx
--------------------------------------------------------------------------------
/Pq_DynamicSortColumns_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_DynamicSortColumns_Video.xlsx
--------------------------------------------------------------------------------
/Pq_DynamicTRansformColumnNames_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_DynamicTRansformColumnNames_Video.xlsx
--------------------------------------------------------------------------------
/Pq_Name_Changes_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_Name_Changes_Video.xlsx
--------------------------------------------------------------------------------
/Pq_Portfolio_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_Portfolio_Video.xlsx
--------------------------------------------------------------------------------
/Pq_Query_Referencing_Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_Query_Referencing_Video.xlsx
--------------------------------------------------------------------------------
/Pq_SplitDynamically_Source.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Pq_SplitDynamically_Source.xlsx
--------------------------------------------------------------------------------
/StockQuotesData.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/StockQuotesData.xlsx
--------------------------------------------------------------------------------
/StockQuotes_SVB_CS.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/StockQuotes_SVB_CS.xlsx
--------------------------------------------------------------------------------
/Trades_video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/Trades_video.xlsx
--------------------------------------------------------------------------------
/pattern2Video.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbotes/powerbiTutorials/13ec6bab783b77e7b6b904274c140e0efdf32cb1/pattern2Video.xlsx
--------------------------------------------------------------------------------
/us_stock_sales_2024.csv:
--------------------------------------------------------------------------------
1 | StockCode,Jan-24,Feb-24,Mar-24,Apr-24,May-24
2 | AAPL,1250,1320,1280,1380,1300
3 | MSFT,1150,1180,1120,1200,1190
4 | GOOGL,980,1020,1000,1050,1010
5 | AMZN,860,890,870,920,910
6 | META,770,800,780,820,810
7 | NVDA,1450,1500,1480,1550,1520
8 | TSLA,1350,1400,1380,1450,1420
9 | NFLX,660,700,680,720,710
10 | AMD,540,580,560,600,590
11 | INTC,470,500,480,520,510
12 |
--------------------------------------------------------------------------------
/us_stock_sales_2024_extended.csv:
--------------------------------------------------------------------------------
1 | StockCode,Jan-24,Feb-24,Mar-24,Apr-24,May-24,Jun-24,Jul-24,Aug-24,Sept-24,Oct-24,Nov-24
2 | AAPL,1250,1320,1280,1380,1300,1350,1400,1450,1500,1550,1600
3 | MSFT,1150,1180,1120,1200,1190,1220,1250,1280,1300,1330,1360
4 | GOOGL,980,1020,1000,1050,1010,1050,1080,1100,1120,1150,1180
5 | AMZN,860,890,870,920,910,940,970,1000,1030,1060,1090
6 | META,770,800,780,820,810,850,870,900,930,950,980
7 | NVDA,1450,1500,1480,1550,1520,1580,1600,1650,1700,1750,1800
8 | TSLA,1350,1400,1380,1450,1420,1480,1500,1550,1600,1650,1700
9 | NFLX,660,700,680,720,710,730,750,770,790,810,830
10 | AMD,540,580,560,600,590,610,630,650,670,690,710
11 | INTC,470,500,480,520,510,530,550,570,590,610,630
12 |
--------------------------------------------------------------------------------