├── README.md
├── generate_people.py
├── generate_dataset.ipynb
└── Analyse Dataset.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # pandas_workshop
2 | A tutorial on creating, editing and analysing CSV files with the Pandas Framework.
3 |
4 | Includes two notebooks and the data generating script.
5 |
--------------------------------------------------------------------------------
/generate_people.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import random
3 |
4 | names = ["Albert","John","Richard","Henry","William"]
5 | surnames = ["Goodman","Black","White","Green","Joneson"]
6 | salaries = [500*random.randint(10,30) for _ in range(10)]
7 |
8 | def generate_random_person(names, surnames, salaries):
9 | return {"name":random.sample(names,1)[0],
10 | "surname":random.sample(surnames,1)[0],
11 | "salary":random.sample(salaries,1)[0]}
12 | def generate_people(k):
13 | return [generate_random_person(names, surnames, salaries) for _ in range(k)]
14 |
15 | df = pd.DataFrame(generate_people(50),columns=["name","surname","salary"])
16 | df.to_csv("random_people.csv")
17 |
18 |
--------------------------------------------------------------------------------
/generate_dataset.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import random"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 4,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "names = [\"Albert\",\"John\",\"Richard\",\"Henry\",\"William\"]\n",
20 | "surnames = [\"Goodman\",\"Black\",\"White\",\"Green\",\"Joneson\"]\n",
21 | "salaries = [500*random.randint(10,30) for _ in range(10)]"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 25,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "def generate_random_person(names, surnames, salaries):\n",
31 | " return {\"name\":random.sample(names,1)[0],\n",
32 | " \"surname\":random.sample(surnames,1)[0],\n",
33 | " \"salary\":random.sample(salaries,1)[0]}\n",
34 | "def generate_people(k):\n",
35 | " return [generate_random_person(names, surnames, salaries) for _ in range(k)]"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 26,
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "data": {
45 | "text/plain": [
46 | "{'name': 'Richard', 'salary': 7500, 'surname': 'Joneson'}"
47 | ]
48 | },
49 | "execution_count": 26,
50 | "metadata": {},
51 | "output_type": "execute_result"
52 | }
53 | ],
54 | "source": [
55 | "generate_random_person(names, surnames, salaries)"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 28,
61 | "metadata": {},
62 | "outputs": [],
63 | "source": [
64 | "df = pd.DataFrame(generate_people(50),columns=[\"name\",\"surname\",\"salary\"])"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 29,
70 | "metadata": {},
71 | "outputs": [
72 | {
73 | "data": {
74 | "text/html": [
75 | "
\n",
76 | "\n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " | \n",
93 | " name | \n",
94 | " surname | \n",
95 | " salary | \n",
96 | "
\n",
97 | " \n",
98 | " \n",
99 | " \n",
100 | " | 0 | \n",
101 | " Henry | \n",
102 | " Goodman | \n",
103 | " 7500 | \n",
104 | "
\n",
105 | " \n",
106 | " | 1 | \n",
107 | " Henry | \n",
108 | " Black | \n",
109 | " 9500 | \n",
110 | "
\n",
111 | " \n",
112 | " | 2 | \n",
113 | " William | \n",
114 | " Goodman | \n",
115 | " 7000 | \n",
116 | "
\n",
117 | " \n",
118 | " | 3 | \n",
119 | " John | \n",
120 | " Black | \n",
121 | " 6000 | \n",
122 | "
\n",
123 | " \n",
124 | " | 4 | \n",
125 | " Albert | \n",
126 | " White | \n",
127 | " 9500 | \n",
128 | "
\n",
129 | " \n",
130 | " | 5 | \n",
131 | " William | \n",
132 | " Goodman | \n",
133 | " 7500 | \n",
134 | "
\n",
135 | " \n",
136 | " | 6 | \n",
137 | " Richard | \n",
138 | " Green | \n",
139 | " 12500 | \n",
140 | "
\n",
141 | " \n",
142 | " | 7 | \n",
143 | " Albert | \n",
144 | " Goodman | \n",
145 | " 7500 | \n",
146 | "
\n",
147 | " \n",
148 | " | 8 | \n",
149 | " Albert | \n",
150 | " Joneson | \n",
151 | " 12500 | \n",
152 | "
\n",
153 | " \n",
154 | " | 9 | \n",
155 | " Richard | \n",
156 | " Black | \n",
157 | " 6000 | \n",
158 | "
\n",
159 | " \n",
160 | " | 10 | \n",
161 | " William | \n",
162 | " Green | \n",
163 | " 7500 | \n",
164 | "
\n",
165 | " \n",
166 | " | 11 | \n",
167 | " William | \n",
168 | " Joneson | \n",
169 | " 6000 | \n",
170 | "
\n",
171 | " \n",
172 | " | 12 | \n",
173 | " William | \n",
174 | " Joneson | \n",
175 | " 7500 | \n",
176 | "
\n",
177 | " \n",
178 | " | 13 | \n",
179 | " Richard | \n",
180 | " Green | \n",
181 | " 7000 | \n",
182 | "
\n",
183 | " \n",
184 | " | 14 | \n",
185 | " Henry | \n",
186 | " Goodman | \n",
187 | " 9500 | \n",
188 | "
\n",
189 | " \n",
190 | " | 15 | \n",
191 | " Richard | \n",
192 | " Joneson | \n",
193 | " 6000 | \n",
194 | "
\n",
195 | " \n",
196 | " | 16 | \n",
197 | " William | \n",
198 | " Green | \n",
199 | " 8500 | \n",
200 | "
\n",
201 | " \n",
202 | " | 17 | \n",
203 | " John | \n",
204 | " Green | \n",
205 | " 7500 | \n",
206 | "
\n",
207 | " \n",
208 | " | 18 | \n",
209 | " Henry | \n",
210 | " White | \n",
211 | " 9500 | \n",
212 | "
\n",
213 | " \n",
214 | " | 19 | \n",
215 | " John | \n",
216 | " Joneson | \n",
217 | " 7000 | \n",
218 | "
\n",
219 | " \n",
220 | " | 20 | \n",
221 | " Albert | \n",
222 | " Black | \n",
223 | " 7500 | \n",
224 | "
\n",
225 | " \n",
226 | " | 21 | \n",
227 | " Richard | \n",
228 | " White | \n",
229 | " 7500 | \n",
230 | "
\n",
231 | " \n",
232 | " | 22 | \n",
233 | " Richard | \n",
234 | " Black | \n",
235 | " 8500 | \n",
236 | "
\n",
237 | " \n",
238 | " | 23 | \n",
239 | " Henry | \n",
240 | " Goodman | \n",
241 | " 7500 | \n",
242 | "
\n",
243 | " \n",
244 | " | 24 | \n",
245 | " Henry | \n",
246 | " Black | \n",
247 | " 7000 | \n",
248 | "
\n",
249 | " \n",
250 | " | 25 | \n",
251 | " John | \n",
252 | " Green | \n",
253 | " 11500 | \n",
254 | "
\n",
255 | " \n",
256 | " | 26 | \n",
257 | " John | \n",
258 | " Black | \n",
259 | " 8500 | \n",
260 | "
\n",
261 | " \n",
262 | " | 27 | \n",
263 | " Albert | \n",
264 | " Green | \n",
265 | " 11500 | \n",
266 | "
\n",
267 | " \n",
268 | " | 28 | \n",
269 | " John | \n",
270 | " Goodman | \n",
271 | " 7500 | \n",
272 | "
\n",
273 | " \n",
274 | " | 29 | \n",
275 | " John | \n",
276 | " White | \n",
277 | " 11500 | \n",
278 | "
\n",
279 | " \n",
280 | " | 30 | \n",
281 | " William | \n",
282 | " White | \n",
283 | " 7000 | \n",
284 | "
\n",
285 | " \n",
286 | " | 31 | \n",
287 | " John | \n",
288 | " White | \n",
289 | " 9500 | \n",
290 | "
\n",
291 | " \n",
292 | " | 32 | \n",
293 | " Albert | \n",
294 | " Green | \n",
295 | " 9500 | \n",
296 | "
\n",
297 | " \n",
298 | " | 33 | \n",
299 | " William | \n",
300 | " Green | \n",
301 | " 6000 | \n",
302 | "
\n",
303 | " \n",
304 | " | 34 | \n",
305 | " William | \n",
306 | " Black | \n",
307 | " 7000 | \n",
308 | "
\n",
309 | " \n",
310 | " | 35 | \n",
311 | " Henry | \n",
312 | " White | \n",
313 | " 7000 | \n",
314 | "
\n",
315 | " \n",
316 | " | 36 | \n",
317 | " Albert | \n",
318 | " Black | \n",
319 | " 7000 | \n",
320 | "
\n",
321 | " \n",
322 | " | 37 | \n",
323 | " John | \n",
324 | " Goodman | \n",
325 | " 7500 | \n",
326 | "
\n",
327 | " \n",
328 | " | 38 | \n",
329 | " Richard | \n",
330 | " White | \n",
331 | " 11500 | \n",
332 | "
\n",
333 | " \n",
334 | " | 39 | \n",
335 | " Richard | \n",
336 | " Goodman | \n",
337 | " 7000 | \n",
338 | "
\n",
339 | " \n",
340 | " | 40 | \n",
341 | " Henry | \n",
342 | " Green | \n",
343 | " 7500 | \n",
344 | "
\n",
345 | " \n",
346 | " | 41 | \n",
347 | " Richard | \n",
348 | " Goodman | \n",
349 | " 8500 | \n",
350 | "
\n",
351 | " \n",
352 | " | 42 | \n",
353 | " William | \n",
354 | " White | \n",
355 | " 11500 | \n",
356 | "
\n",
357 | " \n",
358 | " | 43 | \n",
359 | " John | \n",
360 | " Black | \n",
361 | " 12500 | \n",
362 | "
\n",
363 | " \n",
364 | " | 44 | \n",
365 | " John | \n",
366 | " Green | \n",
367 | " 7500 | \n",
368 | "
\n",
369 | " \n",
370 | " | 45 | \n",
371 | " Richard | \n",
372 | " Joneson | \n",
373 | " 8500 | \n",
374 | "
\n",
375 | " \n",
376 | " | 46 | \n",
377 | " William | \n",
378 | " Goodman | \n",
379 | " 9500 | \n",
380 | "
\n",
381 | " \n",
382 | " | 47 | \n",
383 | " William | \n",
384 | " White | \n",
385 | " 6000 | \n",
386 | "
\n",
387 | " \n",
388 | " | 48 | \n",
389 | " Albert | \n",
390 | " Black | \n",
391 | " 7000 | \n",
392 | "
\n",
393 | " \n",
394 | " | 49 | \n",
395 | " William | \n",
396 | " Green | \n",
397 | " 12500 | \n",
398 | "
\n",
399 | " \n",
400 | "
\n",
401 | "
"
402 | ],
403 | "text/plain": [
404 | " name surname salary\n",
405 | "0 Henry Goodman 7500\n",
406 | "1 Henry Black 9500\n",
407 | "2 William Goodman 7000\n",
408 | "3 John Black 6000\n",
409 | "4 Albert White 9500\n",
410 | "5 William Goodman 7500\n",
411 | "6 Richard Green 12500\n",
412 | "7 Albert Goodman 7500\n",
413 | "8 Albert Joneson 12500\n",
414 | "9 Richard Black 6000\n",
415 | "10 William Green 7500\n",
416 | "11 William Joneson 6000\n",
417 | "12 William Joneson 7500\n",
418 | "13 Richard Green 7000\n",
419 | "14 Henry Goodman 9500\n",
420 | "15 Richard Joneson 6000\n",
421 | "16 William Green 8500\n",
422 | "17 John Green 7500\n",
423 | "18 Henry White 9500\n",
424 | "19 John Joneson 7000\n",
425 | "20 Albert Black 7500\n",
426 | "21 Richard White 7500\n",
427 | "22 Richard Black 8500\n",
428 | "23 Henry Goodman 7500\n",
429 | "24 Henry Black 7000\n",
430 | "25 John Green 11500\n",
431 | "26 John Black 8500\n",
432 | "27 Albert Green 11500\n",
433 | "28 John Goodman 7500\n",
434 | "29 John White 11500\n",
435 | "30 William White 7000\n",
436 | "31 John White 9500\n",
437 | "32 Albert Green 9500\n",
438 | "33 William Green 6000\n",
439 | "34 William Black 7000\n",
440 | "35 Henry White 7000\n",
441 | "36 Albert Black 7000\n",
442 | "37 John Goodman 7500\n",
443 | "38 Richard White 11500\n",
444 | "39 Richard Goodman 7000\n",
445 | "40 Henry Green 7500\n",
446 | "41 Richard Goodman 8500\n",
447 | "42 William White 11500\n",
448 | "43 John Black 12500\n",
449 | "44 John Green 7500\n",
450 | "45 Richard Joneson 8500\n",
451 | "46 William Goodman 9500\n",
452 | "47 William White 6000\n",
453 | "48 Albert Black 7000\n",
454 | "49 William Green 12500"
455 | ]
456 | },
457 | "execution_count": 29,
458 | "metadata": {},
459 | "output_type": "execute_result"
460 | }
461 | ],
462 | "source": [
463 | "df"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": null,
469 | "metadata": {},
470 | "outputs": [],
471 | "source": []
472 | }
473 | ],
474 | "metadata": {
475 | "kernelspec": {
476 | "display_name": "Python 2",
477 | "language": "python",
478 | "name": "python2"
479 | },
480 | "language_info": {
481 | "codemirror_mode": {
482 | "name": "ipython",
483 | "version": 2
484 | },
485 | "file_extension": ".py",
486 | "mimetype": "text/x-python",
487 | "name": "python",
488 | "nbconvert_exporter": "python",
489 | "pygments_lexer": "ipython2",
490 | "version": "2.7.12"
491 | }
492 | },
493 | "nbformat": 4,
494 | "nbformat_minor": 2
495 | }
496 |
--------------------------------------------------------------------------------
/Analyse Dataset.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd \n",
10 | "\n",
11 | "df = pd.read_csv(\"random_people.csv\")"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 3,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/html": [
22 | "\n",
23 | "\n",
36 | "
\n",
37 | " \n",
38 | " \n",
39 | " | \n",
40 | " Unnamed: 0 | \n",
41 | " name | \n",
42 | " surname | \n",
43 | " salary | \n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " \n",
48 | " | 0 | \n",
49 | " 0 | \n",
50 | " Henry | \n",
51 | " Joneson | \n",
52 | " 5000 | \n",
53 | "
\n",
54 | " \n",
55 | " | 1 | \n",
56 | " 1 | \n",
57 | " Albert | \n",
58 | " Goodman | \n",
59 | " 10000 | \n",
60 | "
\n",
61 | " \n",
62 | " | 2 | \n",
63 | " 2 | \n",
64 | " William | \n",
65 | " Goodman | \n",
66 | " 10000 | \n",
67 | "
\n",
68 | " \n",
69 | " | 3 | \n",
70 | " 3 | \n",
71 | " John | \n",
72 | " Joneson | \n",
73 | " 10000 | \n",
74 | "
\n",
75 | " \n",
76 | " | 4 | \n",
77 | " 4 | \n",
78 | " Albert | \n",
79 | " Black | \n",
80 | " 10000 | \n",
81 | "
\n",
82 | " \n",
83 | " | 5 | \n",
84 | " 5 | \n",
85 | " Henry | \n",
86 | " Joneson | \n",
87 | " 12000 | \n",
88 | "
\n",
89 | " \n",
90 | " | 6 | \n",
91 | " 6 | \n",
92 | " Richard | \n",
93 | " Green | \n",
94 | " 5500 | \n",
95 | "
\n",
96 | " \n",
97 | " | 7 | \n",
98 | " 7 | \n",
99 | " Henry | \n",
100 | " Joneson | \n",
101 | " 11000 | \n",
102 | "
\n",
103 | " \n",
104 | " | 8 | \n",
105 | " 8 | \n",
106 | " Henry | \n",
107 | " Goodman | \n",
108 | " 12000 | \n",
109 | "
\n",
110 | " \n",
111 | " | 9 | \n",
112 | " 9 | \n",
113 | " Albert | \n",
114 | " Joneson | \n",
115 | " 11000 | \n",
116 | "
\n",
117 | " \n",
118 | "
\n",
119 | "
"
120 | ],
121 | "text/plain": [
122 | " Unnamed: 0 name surname salary\n",
123 | "0 0 Henry Joneson 5000\n",
124 | "1 1 Albert Goodman 10000\n",
125 | "2 2 William Goodman 10000\n",
126 | "3 3 John Joneson 10000\n",
127 | "4 4 Albert Black 10000\n",
128 | "5 5 Henry Joneson 12000\n",
129 | "6 6 Richard Green 5500\n",
130 | "7 7 Henry Joneson 11000\n",
131 | "8 8 Henry Goodman 12000\n",
132 | "9 9 Albert Joneson 11000"
133 | ]
134 | },
135 | "execution_count": 3,
136 | "metadata": {},
137 | "output_type": "execute_result"
138 | }
139 | ],
140 | "source": [
141 | "#start getting a feel of the data\n",
142 | "df.head(10)"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 6,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "data": {
152 | "text/plain": [
153 | "10000 16\n",
154 | "12000 8\n",
155 | "11000 7\n",
156 | "9500 6\n",
157 | "5500 5\n",
158 | "13500 5\n",
159 | "5000 3\n",
160 | "Name: salary, dtype: int64"
161 | ]
162 | },
163 | "execution_count": 6,
164 | "metadata": {},
165 | "output_type": "execute_result"
166 | }
167 | ],
168 | "source": [
169 | "df['salary'].value_counts()"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 12,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "data": {
179 | "text/plain": [
180 | "10000.0"
181 | ]
182 | },
183 | "execution_count": 12,
184 | "metadata": {},
185 | "output_type": "execute_result"
186 | }
187 | ],
188 | "source": [
189 | "df['salary'].median()"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 13,
195 | "metadata": {},
196 | "outputs": [],
197 | "source": [
198 | "df[\"salary_after_tax\"] = df[\"salary\"]*.8"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 15,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "def tax(s):\n",
208 | " if s>=6000:\n",
209 | " return s*.7\n",
210 | " else:\n",
211 | " return s*.85"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 16,
217 | "metadata": {},
218 | "outputs": [],
219 | "source": [
220 | "df[\"salary_after_tax\"] = df[\"salary\"].apply(tax)"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 17,
226 | "metadata": {},
227 | "outputs": [
228 | {
229 | "data": {
230 | "text/html": [
231 | "\n",
232 | "\n",
245 | "
\n",
246 | " \n",
247 | " \n",
248 | " | \n",
249 | " Unnamed: 0 | \n",
250 | " name | \n",
251 | " surname | \n",
252 | " salary | \n",
253 | " salary_after_tax | \n",
254 | "
\n",
255 | " \n",
256 | " \n",
257 | " \n",
258 | " | 0 | \n",
259 | " 0 | \n",
260 | " Henry | \n",
261 | " Joneson | \n",
262 | " 5000 | \n",
263 | " 4250.0 | \n",
264 | "
\n",
265 | " \n",
266 | " | 1 | \n",
267 | " 1 | \n",
268 | " Albert | \n",
269 | " Goodman | \n",
270 | " 10000 | \n",
271 | " 7000.0 | \n",
272 | "
\n",
273 | " \n",
274 | " | 2 | \n",
275 | " 2 | \n",
276 | " William | \n",
277 | " Goodman | \n",
278 | " 10000 | \n",
279 | " 7000.0 | \n",
280 | "
\n",
281 | " \n",
282 | " | 3 | \n",
283 | " 3 | \n",
284 | " John | \n",
285 | " Joneson | \n",
286 | " 10000 | \n",
287 | " 7000.0 | \n",
288 | "
\n",
289 | " \n",
290 | " | 4 | \n",
291 | " 4 | \n",
292 | " Albert | \n",
293 | " Black | \n",
294 | " 10000 | \n",
295 | " 7000.0 | \n",
296 | "
\n",
297 | " \n",
298 | "
\n",
299 | "
"
300 | ],
301 | "text/plain": [
302 | " Unnamed: 0 name surname salary salary_after_tax\n",
303 | "0 0 Henry Joneson 5000 4250.0\n",
304 | "1 1 Albert Goodman 10000 7000.0\n",
305 | "2 2 William Goodman 10000 7000.0\n",
306 | "3 3 John Joneson 10000 7000.0\n",
307 | "4 4 Albert Black 10000 7000.0"
308 | ]
309 | },
310 | "execution_count": 17,
311 | "metadata": {},
312 | "output_type": "execute_result"
313 | }
314 | ],
315 | "source": []
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": 18,
320 | "metadata": {},
321 | "outputs": [],
322 | "source": [
323 | "df_low = df[df[\"salary\"]<6000]\n",
324 | "df_high = df[df[\"salary\"]>=6000]"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 20,
330 | "metadata": {},
331 | "outputs": [
332 | {
333 | "data": {
334 | "text/plain": [
335 | "10892.857142857143"
336 | ]
337 | },
338 | "execution_count": 20,
339 | "metadata": {},
340 | "output_type": "execute_result"
341 | }
342 | ],
343 | "source": [
344 | "df_high[\"salary\"].mean()"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 53,
350 | "metadata": {},
351 | "outputs": [],
352 | "source": [
353 | "df_low= df.loc[df[\"salary\"]<6000,\"salary\"]\n",
354 | "df.loc[df[\"salary\"]<6000,\"salary_after_tax\"] = df_low*.85\n",
355 | "\n",
356 | "df_low= df.loc[df[\"salary\"]>=6000,\"salary\"]\n",
357 | "df.loc[df[\"salary\"]>=6000,\"salary_after_tax\"] = df_low*.7"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 38,
363 | "metadata": {},
364 | "outputs": [
365 | {
366 | "data": {
367 | "text/plain": [
368 | "0 4250.0\n",
369 | "6 4675.0\n",
370 | "14 4675.0\n",
371 | "17 4675.0\n",
372 | "21 4250.0\n",
373 | "32 4675.0\n",
374 | "33 4250.0\n",
375 | "37 4675.0\n",
376 | "Name: salary, dtype: float64"
377 | ]
378 | },
379 | "execution_count": 38,
380 | "metadata": {},
381 | "output_type": "execute_result"
382 | }
383 | ],
384 | "source": [
385 | "df_low"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 55,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/html": [
396 | "\n",
397 | "\n",
410 | "
\n",
411 | " \n",
412 | " \n",
413 | " | \n",
414 | " Unnamed: 0 | \n",
415 | " name | \n",
416 | " surname | \n",
417 | " salary | \n",
418 | " salary_after_tax | \n",
419 | "
\n",
420 | " \n",
421 | " \n",
422 | " \n",
423 | " | 0 | \n",
424 | " 0 | \n",
425 | " Henry | \n",
426 | " Joneson | \n",
427 | " 5000 | \n",
428 | " 4250.0 | \n",
429 | "
\n",
430 | " \n",
431 | " | 1 | \n",
432 | " 1 | \n",
433 | " Albert | \n",
434 | " Goodman | \n",
435 | " 10000 | \n",
436 | " 7000.0 | \n",
437 | "
\n",
438 | " \n",
439 | " | 2 | \n",
440 | " 2 | \n",
441 | " William | \n",
442 | " Goodman | \n",
443 | " 10000 | \n",
444 | " 7000.0 | \n",
445 | "
\n",
446 | " \n",
447 | " | 3 | \n",
448 | " 3 | \n",
449 | " John | \n",
450 | " Joneson | \n",
451 | " 10000 | \n",
452 | " 7000.0 | \n",
453 | "
\n",
454 | " \n",
455 | " | 4 | \n",
456 | " 4 | \n",
457 | " Albert | \n",
458 | " Black | \n",
459 | " 10000 | \n",
460 | " 7000.0 | \n",
461 | "
\n",
462 | " \n",
463 | " | 5 | \n",
464 | " 5 | \n",
465 | " Henry | \n",
466 | " Joneson | \n",
467 | " 12000 | \n",
468 | " 8400.0 | \n",
469 | "
\n",
470 | " \n",
471 | " | 6 | \n",
472 | " 6 | \n",
473 | " Richard | \n",
474 | " Green | \n",
475 | " 5500 | \n",
476 | " 4675.0 | \n",
477 | "
\n",
478 | " \n",
479 | " | 7 | \n",
480 | " 7 | \n",
481 | " Henry | \n",
482 | " Joneson | \n",
483 | " 11000 | \n",
484 | " 7700.0 | \n",
485 | "
\n",
486 | " \n",
487 | " | 8 | \n",
488 | " 8 | \n",
489 | " Henry | \n",
490 | " Goodman | \n",
491 | " 12000 | \n",
492 | " 8400.0 | \n",
493 | "
\n",
494 | " \n",
495 | " | 9 | \n",
496 | " 9 | \n",
497 | " Albert | \n",
498 | " Joneson | \n",
499 | " 11000 | \n",
500 | " 7700.0 | \n",
501 | "
\n",
502 | " \n",
503 | " | 10 | \n",
504 | " 10 | \n",
505 | " William | \n",
506 | " Joneson | \n",
507 | " 10000 | \n",
508 | " 7000.0 | \n",
509 | "
\n",
510 | " \n",
511 | " | 11 | \n",
512 | " 11 | \n",
513 | " John | \n",
514 | " White | \n",
515 | " 10000 | \n",
516 | " 7000.0 | \n",
517 | "
\n",
518 | " \n",
519 | " | 12 | \n",
520 | " 12 | \n",
521 | " Henry | \n",
522 | " Black | \n",
523 | " 11000 | \n",
524 | " 7700.0 | \n",
525 | "
\n",
526 | " \n",
527 | " | 13 | \n",
528 | " 13 | \n",
529 | " Albert | \n",
530 | " Goodman | \n",
531 | " 10000 | \n",
532 | " 7000.0 | \n",
533 | "
\n",
534 | " \n",
535 | " | 14 | \n",
536 | " 14 | \n",
537 | " Richard | \n",
538 | " Green | \n",
539 | " 5500 | \n",
540 | " 4675.0 | \n",
541 | "
\n",
542 | " \n",
543 | " | 15 | \n",
544 | " 15 | \n",
545 | " Henry | \n",
546 | " Black | \n",
547 | " 13500 | \n",
548 | " 9450.0 | \n",
549 | "
\n",
550 | " \n",
551 | " | 16 | \n",
552 | " 16 | \n",
553 | " Richard | \n",
554 | " White | \n",
555 | " 11000 | \n",
556 | " 7700.0 | \n",
557 | "
\n",
558 | " \n",
559 | " | 17 | \n",
560 | " 17 | \n",
561 | " Albert | \n",
562 | " Black | \n",
563 | " 5500 | \n",
564 | " 4675.0 | \n",
565 | "
\n",
566 | " \n",
567 | " | 18 | \n",
568 | " 18 | \n",
569 | " Henry | \n",
570 | " Green | \n",
571 | " 10000 | \n",
572 | " 7000.0 | \n",
573 | "
\n",
574 | " \n",
575 | " | 19 | \n",
576 | " 19 | \n",
577 | " Albert | \n",
578 | " Joneson | \n",
579 | " 11000 | \n",
580 | " 7700.0 | \n",
581 | "
\n",
582 | " \n",
583 | " | 20 | \n",
584 | " 20 | \n",
585 | " William | \n",
586 | " Goodman | \n",
587 | " 12000 | \n",
588 | " 8400.0 | \n",
589 | "
\n",
590 | " \n",
591 | " | 21 | \n",
592 | " 21 | \n",
593 | " William | \n",
594 | " Goodman | \n",
595 | " 5000 | \n",
596 | " 4250.0 | \n",
597 | "
\n",
598 | " \n",
599 | " | 22 | \n",
600 | " 22 | \n",
601 | " John | \n",
602 | " Green | \n",
603 | " 9500 | \n",
604 | " 6650.0 | \n",
605 | "
\n",
606 | " \n",
607 | " | 23 | \n",
608 | " 23 | \n",
609 | " John | \n",
610 | " Black | \n",
611 | " 13500 | \n",
612 | " 9450.0 | \n",
613 | "
\n",
614 | " \n",
615 | " | 24 | \n",
616 | " 24 | \n",
617 | " Richard | \n",
618 | " Green | \n",
619 | " 13500 | \n",
620 | " 9450.0 | \n",
621 | "
\n",
622 | " \n",
623 | " | 25 | \n",
624 | " 25 | \n",
625 | " Henry | \n",
626 | " Joneson | \n",
627 | " 12000 | \n",
628 | " 8400.0 | \n",
629 | "
\n",
630 | " \n",
631 | " | 26 | \n",
632 | " 26 | \n",
633 | " Henry | \n",
634 | " Goodman | \n",
635 | " 10000 | \n",
636 | " 7000.0 | \n",
637 | "
\n",
638 | " \n",
639 | " | 27 | \n",
640 | " 27 | \n",
641 | " John | \n",
642 | " Joneson | \n",
643 | " 9500 | \n",
644 | " 6650.0 | \n",
645 | "
\n",
646 | " \n",
647 | " | 28 | \n",
648 | " 28 | \n",
649 | " Henry | \n",
650 | " Goodman | \n",
651 | " 11000 | \n",
652 | " 7700.0 | \n",
653 | "
\n",
654 | " \n",
655 | " | 29 | \n",
656 | " 29 | \n",
657 | " William | \n",
658 | " Green | \n",
659 | " 12000 | \n",
660 | " 8400.0 | \n",
661 | "
\n",
662 | " \n",
663 | " | 30 | \n",
664 | " 30 | \n",
665 | " Henry | \n",
666 | " Goodman | \n",
667 | " 10000 | \n",
668 | " 7000.0 | \n",
669 | "
\n",
670 | " \n",
671 | " | 31 | \n",
672 | " 31 | \n",
673 | " Richard | \n",
674 | " Black | \n",
675 | " 10000 | \n",
676 | " 7000.0 | \n",
677 | "
\n",
678 | " \n",
679 | " | 32 | \n",
680 | " 32 | \n",
681 | " Richard | \n",
682 | " Joneson | \n",
683 | " 5500 | \n",
684 | " 4675.0 | \n",
685 | "
\n",
686 | " \n",
687 | " | 33 | \n",
688 | " 33 | \n",
689 | " Richard | \n",
690 | " Joneson | \n",
691 | " 5000 | \n",
692 | " 4250.0 | \n",
693 | "
\n",
694 | " \n",
695 | " | 34 | \n",
696 | " 34 | \n",
697 | " Henry | \n",
698 | " Black | \n",
699 | " 9500 | \n",
700 | " 6650.0 | \n",
701 | "
\n",
702 | " \n",
703 | " | 35 | \n",
704 | " 35 | \n",
705 | " John | \n",
706 | " White | \n",
707 | " 13500 | \n",
708 | " 9450.0 | \n",
709 | "
\n",
710 | " \n",
711 | " | 36 | \n",
712 | " 36 | \n",
713 | " Henry | \n",
714 | " Green | \n",
715 | " 11000 | \n",
716 | " 7700.0 | \n",
717 | "
\n",
718 | " \n",
719 | " | 37 | \n",
720 | " 37 | \n",
721 | " John | \n",
722 | " Black | \n",
723 | " 5500 | \n",
724 | " 4675.0 | \n",
725 | "
\n",
726 | " \n",
727 | " | 38 | \n",
728 | " 38 | \n",
729 | " William | \n",
730 | " Green | \n",
731 | " 12000 | \n",
732 | " 8400.0 | \n",
733 | "
\n",
734 | " \n",
735 | " | 39 | \n",
736 | " 39 | \n",
737 | " Albert | \n",
738 | " Green | \n",
739 | " 10000 | \n",
740 | " 7000.0 | \n",
741 | "
\n",
742 | " \n",
743 | " | 40 | \n",
744 | " 40 | \n",
745 | " Richard | \n",
746 | " Joneson | \n",
747 | " 9500 | \n",
748 | " 6650.0 | \n",
749 | "
\n",
750 | " \n",
751 | " | 41 | \n",
752 | " 41 | \n",
753 | " William | \n",
754 | " Joneson | \n",
755 | " 12000 | \n",
756 | " 8400.0 | \n",
757 | "
\n",
758 | " \n",
759 | " | 42 | \n",
760 | " 42 | \n",
761 | " John | \n",
762 | " Joneson | \n",
763 | " 10000 | \n",
764 | " 7000.0 | \n",
765 | "
\n",
766 | " \n",
767 | " | 43 | \n",
768 | " 43 | \n",
769 | " William | \n",
770 | " Black | \n",
771 | " 10000 | \n",
772 | " 7000.0 | \n",
773 | "
\n",
774 | " \n",
775 | " | 44 | \n",
776 | " 44 | \n",
777 | " Albert | \n",
778 | " Black | \n",
779 | " 12000 | \n",
780 | " 8400.0 | \n",
781 | "
\n",
782 | " \n",
783 | " | 45 | \n",
784 | " 45 | \n",
785 | " John | \n",
786 | " Goodman | \n",
787 | " 13500 | \n",
788 | " 9450.0 | \n",
789 | "
\n",
790 | " \n",
791 | " | 46 | \n",
792 | " 46 | \n",
793 | " John | \n",
794 | " Joneson | \n",
795 | " 10000 | \n",
796 | " 7000.0 | \n",
797 | "
\n",
798 | " \n",
799 | " | 47 | \n",
800 | " 47 | \n",
801 | " John | \n",
802 | " Joneson | \n",
803 | " 9500 | \n",
804 | " 6650.0 | \n",
805 | "
\n",
806 | " \n",
807 | " | 48 | \n",
808 | " 48 | \n",
809 | " Richard | \n",
810 | " Black | \n",
811 | " 9500 | \n",
812 | " 6650.0 | \n",
813 | "
\n",
814 | " \n",
815 | " | 49 | \n",
816 | " 49 | \n",
817 | " Albert | \n",
818 | " White | \n",
819 | " 10000 | \n",
820 | " 7000.0 | \n",
821 | "
\n",
822 | " \n",
823 | "
\n",
824 | "
"
825 | ],
826 | "text/plain": [
827 | " Unnamed: 0 name surname salary salary_after_tax\n",
828 | "0 0 Henry Joneson 5000 4250.0\n",
829 | "1 1 Albert Goodman 10000 7000.0\n",
830 | "2 2 William Goodman 10000 7000.0\n",
831 | "3 3 John Joneson 10000 7000.0\n",
832 | "4 4 Albert Black 10000 7000.0\n",
833 | "5 5 Henry Joneson 12000 8400.0\n",
834 | "6 6 Richard Green 5500 4675.0\n",
835 | "7 7 Henry Joneson 11000 7700.0\n",
836 | "8 8 Henry Goodman 12000 8400.0\n",
837 | "9 9 Albert Joneson 11000 7700.0\n",
838 | "10 10 William Joneson 10000 7000.0\n",
839 | "11 11 John White 10000 7000.0\n",
840 | "12 12 Henry Black 11000 7700.0\n",
841 | "13 13 Albert Goodman 10000 7000.0\n",
842 | "14 14 Richard Green 5500 4675.0\n",
843 | "15 15 Henry Black 13500 9450.0\n",
844 | "16 16 Richard White 11000 7700.0\n",
845 | "17 17 Albert Black 5500 4675.0\n",
846 | "18 18 Henry Green 10000 7000.0\n",
847 | "19 19 Albert Joneson 11000 7700.0\n",
848 | "20 20 William Goodman 12000 8400.0\n",
849 | "21 21 William Goodman 5000 4250.0\n",
850 | "22 22 John Green 9500 6650.0\n",
851 | "23 23 John Black 13500 9450.0\n",
852 | "24 24 Richard Green 13500 9450.0\n",
853 | "25 25 Henry Joneson 12000 8400.0\n",
854 | "26 26 Henry Goodman 10000 7000.0\n",
855 | "27 27 John Joneson 9500 6650.0\n",
856 | "28 28 Henry Goodman 11000 7700.0\n",
857 | "29 29 William Green 12000 8400.0\n",
858 | "30 30 Henry Goodman 10000 7000.0\n",
859 | "31 31 Richard Black 10000 7000.0\n",
860 | "32 32 Richard Joneson 5500 4675.0\n",
861 | "33 33 Richard Joneson 5000 4250.0\n",
862 | "34 34 Henry Black 9500 6650.0\n",
863 | "35 35 John White 13500 9450.0\n",
864 | "36 36 Henry Green 11000 7700.0\n",
865 | "37 37 John Black 5500 4675.0\n",
866 | "38 38 William Green 12000 8400.0\n",
867 | "39 39 Albert Green 10000 7000.0\n",
868 | "40 40 Richard Joneson 9500 6650.0\n",
869 | "41 41 William Joneson 12000 8400.0\n",
870 | "42 42 John Joneson 10000 7000.0\n",
871 | "43 43 William Black 10000 7000.0\n",
872 | "44 44 Albert Black 12000 8400.0\n",
873 | "45 45 John Goodman 13500 9450.0\n",
874 | "46 46 John Joneson 10000 7000.0\n",
875 | "47 47 John Joneson 9500 6650.0\n",
876 | "48 48 Richard Black 9500 6650.0\n",
877 | "49 49 Albert White 10000 7000.0"
878 | ]
879 | },
880 | "execution_count": 55,
881 | "metadata": {},
882 | "output_type": "execute_result"
883 | }
884 | ],
885 | "source": [
886 | "df.head(50)"
887 | ]
888 | }
889 | ],
890 | "metadata": {
891 | "kernelspec": {
892 | "display_name": "Python 2",
893 | "language": "python",
894 | "name": "python2"
895 | },
896 | "language_info": {
897 | "codemirror_mode": {
898 | "name": "ipython",
899 | "version": 2
900 | },
901 | "file_extension": ".py",
902 | "mimetype": "text/x-python",
903 | "name": "python",
904 | "nbconvert_exporter": "python",
905 | "pygments_lexer": "ipython2",
906 | "version": "2.7.12"
907 | }
908 | },
909 | "nbformat": 4,
910 | "nbformat_minor": 2
911 | }
912 |
--------------------------------------------------------------------------------