├── README.md
├── data
├── banklist.csv
├── billboard.csv
├── concat_1.csv
├── concat_2.csv
├── concat_3.csv
├── country_timeseries.csv
├── gapminder.tsv
├── pew.csv
├── raw_data_urls.txt
├── scientists.csv
├── survey_person.csv
├── survey_site.csv
├── survey_survey.csv
├── survey_visited.csv
├── tesla_stock_quandl.csv
└── weather.csv
├── notebook
├── .ipynb_checkpoints
│ ├── 01_done-checkpoint.ipynb
│ ├── 02_done-checkpoint.ipynb
│ ├── 03_done-checkpoint.ipynb
│ ├── 04_done-checkpoint.ipynb
│ ├── 05_done-checkpoint.ipynb
│ ├── 06_done-checkpoint.ipynb
│ ├── 07_done-checkpoint.ipynb
│ ├── 08_done-checkpoint.ipynb
│ ├── 09_done-checkpoint.ipynb
│ ├── 10_done-Copy1-checkpoint.ipynb
│ ├── 10_done-checkpoint.ipynb
│ ├── 11_done-checkpoint.ipynb
│ ├── 12_done-checkpoint.ipynb
│ ├── Special-checkpoint.ipynb
│ └── hello_jupyter_notebook-checkpoint.ipynb
├── 02_done.ipynb
├── 02_practice.ipynb
├── 03_done.ipynb
├── 03_practice.ipynb
├── 04_done.ipynb
├── 04_practice.ipynb
├── 05_done.ipynb
├── 05_practice.ipynb
├── 06_done.ipynb
├── 06_practice.ipynb
├── 07_done.ipynb
├── 07_practice.ipynb
├── 08_done.ipynb
├── 08_practice.ipynb
├── 09_done.ipynb
├── 09_practice.ipynb
├── 10_done.ipynb
├── 10_practice.ipynb
├── 11_done.ipynb
├── 11_practice.ipynb
├── 12_done.ipynb
├── 12_practice.ipynb
├── Special.ipynb
└── hello_jupyter_notebook.ipynb
└── output
├── scientist_names_series.csv
├── scientists_df.pickle
├── scientists_df.tsv
├── scientists_df.xlsx
├── scientists_df_no_index.csv
├── scientists_names_series.pickle
├── scientists_names_series_df.xls
└── scientists_names_series_df.xlsx
/README.md:
--------------------------------------------------------------------------------
1 | # Do it! 판다스 입문 실습 자료입니다.
2 |
3 | 안녕하세요? 이지스퍼블리싱의 담당 편집자 박현규라고 합니다.
4 | 해당 도서의 실습 자료는 방문하신 깃허브의 오른쪽 위에 있는 `Clone or Download` 버튼(초록색 버튼)으로 내려받을 수 있습니다.
5 | 만약 깃허브가 익숙하지 않은 독자 여러분은 [이지스퍼블리싱 홈페이지](http://easyspub.co.kr/30_Menu/DataList/PUB)에 방문하여
6 | 무료 회원가입을 하시고 [자료실]에서 Do it! 판다스 입문을 검색하여 자료를 내려받으세요.
7 |
--------------------------------------------------------------------------------
/data/concat_1.csv:
--------------------------------------------------------------------------------
1 | A,B,C,D
2 | a0,b0,c0,d0
3 | a1,b1,c1,d1
4 | a2,b2,c2,d2
5 | a3,b3,c3,d3
6 |
--------------------------------------------------------------------------------
/data/concat_2.csv:
--------------------------------------------------------------------------------
1 | A,B,C,D
2 | a4,b4,c4,d4
3 | a5,b5,c5,d5
4 | a6,b6,c6,d6
5 | a7,b7,c7,d7
6 |
--------------------------------------------------------------------------------
/data/concat_3.csv:
--------------------------------------------------------------------------------
1 | A,B,C,D
2 | a8,b8,c8,d8
3 | a9,b9,c9,d9
4 | a10,b10,c10,d10
5 | a11,b11,c11,d11
6 |
--------------------------------------------------------------------------------
/data/country_timeseries.csv:
--------------------------------------------------------------------------------
1 | Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali
2 | 1/5/2015,289,2776,,10030,,,,,,1786,,2977,,,,,
3 | 1/4/2015,288,2775,,9780,,,,,,1781,,2943,,,,,
4 | 1/3/2015,287,2769,8166,9722,,,,,,1767,3496,2915,,,,,
5 | 1/2/2015,286,,8157,,,,,,,,3496,,,,,,
6 | 12/31/2014,284,2730,8115,9633,,,,,,1739,3471,2827,,,,,
7 | 12/28/2014,281,2706,8018,9446,,,,,,1708,3423,2758,,,,,
8 | 12/27/2014,280,2695,,9409,,,,,,1697,,2732,,,,,
9 | 12/24/2014,277,2630,7977,9203,,,,,,,3413,2655,,,,,
10 | 12/21/2014,273,2597,,9004,,,,,,1607,,2582,,,,,
11 | 12/20/2014,272,2571,7862,8939,,,,,,1586,3384,2556,,,,,
12 | 12/18/2014,271,,7830,,,,,,,,3376,,,,,,
13 | 12/14/2014,267,2416,,8356,,,,,,1525,,2085,,,,,
14 | 12/9/2014,262,,7797,,,,,,,,3290,,,,,,
15 | 12/7/2014,260,2292,,7897,20,1,4,1,7,1428,,1768,8,0,1,0,6
16 | 12/3/2014,256,,7719,,,,,,,,3177,,,,,,
17 | 11/30/2014,253,2164,,7312,20,1,4,1,7,1327,,1583,8,0,1,0,6
18 | 11/28/2014,251,,7635,,,,,,,,3145,,,,,,
19 | 11/23/2014,246,2134,,6599,20,1,4,1,7,1260,,1398,8,0,1,0,6
20 | 11/22/2014,245,,7168,,,,,,,,3016,,,,,,
21 | 11/18/2014,241,2047,7082,6190,20,1,4,1,6,1214,2963,1267,8,0,1,0,6
22 | 11/16/2014,239,1971,,6073,20,1,4,1,5,1192,,1250,8,0,1,0,5
23 | 11/15/2014,238,,7069,,,,,,,,2964,,,,,,
24 | 11/11/2014,234,1919,,5586,20,1,4,1,4,1166,,1187,8,0,1,0,3
25 | 11/10/2014,233,,6878,,,,,,,,2812,,,,,,
26 | 11/9/2014,232,1878,,5368,20,1,4,1,1,1142,,1169,8,0,1,0,1
27 | 11/8/2014,231,,6822,,,,,,,,2836,,,,,,
28 | 11/4/2014,227,,6619,4862,20,1,4,1,1,,2766,1130,8,0,1,0,1
29 | 11/3/2014,226,1760,,,,,,,,1054,,,,,,,
30 | 11/2/2014,225,1731,,4759,20,1,4,1,1,1041,,1070,8,0,1,0,1
31 | 10/31/2014,222,,6525,,,,,,,,2697,,,,,,
32 | 10/29/2014,220,1667,,5338,20,1,4,1,1,1018,,1510,8,0,1,0,1
33 | 10/27/2014,218,1906,,5235,20,1,4,1,1,997,,1500,8,0,1,0,1
34 | 10/25/2014,216,,6535,,,,,,,,2413,,,,,,
35 | 10/22/2014,214,,,3896,,,4,1,1,,,1281,,,1,0,1
36 | 10/21/2014,213,1553,,,,,,,,926,,,,,,,
37 | 10/19/2014,211,1540,,3706,20,1,3,1,,904,,1259,8,0,1,0,
38 | 10/18/2014,210,,4665,,,,,,,,2705,,,,,,
39 | 10/14/2014,206,1519,,3410,20,1,3,1,,862,,1200,8,0,0,1,
40 | 10/13/2014,205,,4262,,,,,,,,2484,,,,,,
41 | 10/12/2014,204,1472,,3252,20,1,2,1,,843,,1183,8,0,1,1,
42 | 10/11/2014,203,,4249,,,,,,,,2458,,,,,,
43 | 10/8/2014,200,,,2950,20,1,1,1,,,,930,8,0,1,1,
44 | 10/7/2014,199,1350,4076,,,,,,,778,2316,,,,,,
45 | 10/5/2014,197,1298,,2789,20,1,1,,,768,,879,8,0,0,,
46 | 10/4/2014,196,,3924,,,,,,,,2210,,,,,,
47 | 10/1/2014,193,1199,3834,2437,20,1,1,,,739,2069,623,8,0,0,,
48 | 9/28/2014,190,1157,3696,2304,20,1,,,,710,1998,622,8,0,,,
49 | 9/23/2014,185,1074,3458,2021,20,1,,,,648,1830,605,8,0,,,
50 | 9/21/2014,183,1022,3280,1940,20,1,,,,635,1677,597,8,0,,,
51 | 9/20/2014,182,,,1813,,,,,,,,593,,,,,
52 | 9/19/2014,181,1008,,,,,,,,632,,,,,,,
53 | 9/17/2014,179,,3022,,,,,,,,1578,,,,,,
54 | 9/14/2014,176,942,2710,1673,,,,,,601,1459,562,,,,,
55 | 9/13/2014,175,936,,1620,21,1,,,,595,1296,562,8,0,,,
56 | 9/10/2014,172,899,,1478,21,1,,,,568,,536,8,,,,
57 | 9/9/2014,171,,2407,,,,,,,,,,,,,,
58 | 9/7/2014,169,861,2081,1424,21,3,,,,557,1137,524,8,0,,,
59 | 9/5/2014,167,812,1871,1261,22,1,,,,517,1089,491,8,,,,
60 | 8/31/2014,162,771,1698,1216,21,1,,,,494,871,476,7,,,,
61 | 8/26/2014,157,648,1378,1026,17,,,,,430,694,422,6,,,,
62 | 8/20/2014,151,607,1082,910,16,,,,,406,624,392,5,,,,
63 | 8/18/2014,149,579,972,907,15,,,,,396,576,374,4,,,,
64 | 8/16/2014,147,543,834,848,15,,,,,394,466,365,4,,,,
65 | 8/13/2014,144,519,786,810,12,,,,,380,413,348,4,,,,
66 | 8/11/2014,142,510,670,783,12,,,,,377,355,334,3,,,,
67 | 8/9/2014,140,506,599,730,13,,,,,373,323,315,2,,,,
68 | 8/6/2014,137,495,554,717,13,,,,,367,294,298,2,,,,
69 | 8/4/2014,135,495,516,691,9,,,,,363,282,286,1,,,,
70 | 8/1/2014,132,485,468,646,4,,,,,358,255,273,1,,,,
71 | 7/30/2014,129,472,391,574,3,,,,,346,227,252,1,,,,
72 | 7/27/2014,126,460,329,533,1,,,,,339,156,233,1,,,,
73 | 7/23/2014,123,427,249,525,0,,,,,319,129,224,0,,,,
74 | 7/20/2014,120,415,224,454,,,,,,314,127,219,,,,,
75 | 7/17/2014,117,410,196,442,,,,,,310,116,206,,,,,
76 | 7/14/2014,114,411,174,397,,,,,,310,106,197,,,,,
77 | 7/12/2014,112,406,172,386,,,,,,304,105,194,,,,,
78 | 7/8/2014,108,409,142,337,,,,,,309,88,142,,,,,
79 | 7/6/2014,106,408,131,305,,,,,,307,84,127,,,,,
80 | 7/2/2014,102,412,115,252,,,,,,305,75,101,,,,,
81 | 6/30/2014,100,413,107,239,,,,,,303,65,99,,,,,
82 | 6/22/2014,92,,51,,,,,,,,34,,,,,,
83 | 6/20/2014,90,390,,158,,,,,,270,,34,,,,,
84 | 6/19/2014,89,,41,,,,,,,,25,,,,,,
85 | 6/18/2014,88,390,,136,,,,,,267,,28,,,,,
86 | 6/17/2014,87,,,97,,,,,,,,49,,,,,
87 | 6/16/2014,86,398,33,,,,,,,264,24,,,,,,
88 | 6/10/2014,80,351,13,89,,,,,,226,24,7,,,,,
89 | 6/5/2014,75,,13,81,,,,,,,,6,,,,,
90 | 6/3/2014,73,344,13,,,,,,,215,12,6,,,,,
91 | 6/1/2014,71,328,13,79,,,,,,208,12,6,,,,,
92 | 5/28/2014,67,291,13,50,,,,,,193,12,6,,,,,
93 | 5/27/2014,66,281,12,16,,,,,,186,11,5,,,,,
94 | 5/23/2014,62,258,12,0,,,,,,174,11,0,,,,,
95 | 5/12/2014,51,248,12,0,,,,,,171,11,0,,,,,
96 | 5/10/2014,49,233,12,0,,,,,,157,11,0,,,,,
97 | 5/7/2014,46,236,13,0,,,,,,158,11,0,,,,,
98 | 5/5/2014,44,235,13,0,,,,,,157,11,0,,,,,
99 | 5/3/2014,42,231,13,0,,,,,,155,11,0,,,,,
100 | 5/1/2014,40,226,13,0,,,,,,149,11,0,,,,,
101 | 4/26/2014,35,224,,0,,,,,,143,,0,,,,,
102 | 4/24/2014,33,,35,0,,,,,,,,0,,,,,
103 | 4/23/2014,32,218,,0,,,,,,141,,0,,,,,
104 | 4/22/2014,31,,,0,,,,,,,,0,,,,,
105 | 4/21/2014,30,,34,,,,,,,,11,,,,,,
106 | 4/20/2014,29,208,,,,,,,,136,6,,,,,,
107 | 4/17/2014,26,203,27,,,,,,,129,,,,,,,
108 | 4/16/2014,25,197,27,,,,,,,122,13,,,,,,
109 | 4/15/2014,24,,,12,,,,,,,,,,,,,
110 | 4/14/2014,23,168,,,,,,,,108,,,,,,,
111 | 4/11/2014,20,159,26,2,,,,,,106,13,2,,,,,
112 | 4/9/2014,18,158,25,2,,,,,,101,12,2,,,,,
113 | 4/7/2014,16,151,21,2,,,,,,95,10,2,,,,,
114 | 4/4/2014,13,143,18,2,,,,,,86,7,2,,,,,
115 | 4/1/2014,10,127,8,2,,,,,,83,5,2,,,,,
116 | 3/31/2014,9,122,8,2,,,,,,80,4,2,,,,,
117 | 3/29/2014,7,112,7,,,,,,,70,2,,,,,,
118 | 3/28/2014,6,112,3,2,,,,,,70,3,2,,,,,
119 | 3/27/2014,5,103,8,6,,,,,,66,6,5,,,,,
120 | 3/26/2014,4,86,,,,,,,,62,,,,,,,
121 | 3/25/2014,3,86,,,,,,,,60,,,,,,,
122 | 3/24/2014,2,86,,,,,,,,59,,,,,,,
123 | 3/22/2014,0,49,,,,,,,,29,,,,,,,
--------------------------------------------------------------------------------
/data/pew.csv:
--------------------------------------------------------------------------------
1 | "religion","<$10k","$10-20k","$20-30k","$30-40k","$40-50k","$50-75k","$75-100k","$100-150k",">150k","Don't know/refused"
2 | "Agnostic",27,34,60,81,76,137,122,109,84,96
3 | "Atheist",12,27,37,52,35,70,73,59,74,76
4 | "Buddhist",27,21,30,34,33,58,62,39,53,54
5 | "Catholic",418,617,732,670,638,1116,949,792,633,1489
6 | "Don’t know/refused",15,14,15,11,10,35,21,17,18,116
7 | "Evangelical Prot",575,869,1064,982,881,1486,949,723,414,1529
8 | "Hindu",1,9,7,9,11,34,47,48,54,37
9 | "Historically Black Prot",228,244,236,238,197,223,131,81,78,339
10 | "Jehovah's Witness",20,27,24,24,21,30,15,11,6,37
11 | "Jewish",19,19,25,25,30,95,69,87,151,162
12 | "Mainline Prot",289,495,619,655,651,1107,939,753,634,1328
13 | "Mormon",29,40,48,51,56,112,85,49,42,69
14 | "Muslim",6,7,9,10,9,23,16,8,6,22
15 | "Orthodox",13,17,23,32,32,47,38,42,46,73
16 | "Other Christian",9,7,11,13,13,14,18,14,12,18
17 | "Other Faiths",20,33,40,46,49,63,46,40,41,71
18 | "Other World Religions",5,2,3,4,2,7,3,4,4,8
19 | "Unaffiliated",217,299,374,365,341,528,407,321,258,597
20 |
--------------------------------------------------------------------------------
/data/raw_data_urls.txt:
--------------------------------------------------------------------------------
1 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-01.csv
2 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-02.csv
3 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-03.csv
4 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-04.csv
5 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-05.csv
6 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-06.csv
7 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-07.csv
8 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-08.csv
9 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-09.csv
10 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-10.csv
11 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-11.csv
12 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-12.csv
13 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-01.csv
14 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-02.csv
15 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-03.csv
16 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-04.csv
17 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-05.csv
18 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-06.csv
19 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-07.csv
20 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-08.csv
21 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-09.csv
22 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-10.csv
23 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-11.csv
24 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-12.csv
25 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-08.csv
26 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-09.csv
27 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-10.csv
28 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-11.csv
29 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-12.csv
30 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-01.csv
31 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-02.csv
32 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-03.csv
33 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-04.csv
34 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-05.csv
35 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-06.csv
36 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-07.csv
37 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-08.csv
38 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-09.csv
39 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-10.csv
40 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-11.csv
41 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-12.csv
42 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-01.csv
43 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-02.csv
44 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-03.csv
45 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-04.csv
46 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-05.csv
47 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-06.csv
48 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-07.csv
49 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-08.csv
50 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-09.csv
51 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-10.csv
52 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-11.csv
53 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-12.csv
54 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-01.csv
55 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-02.csv
56 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-03.csv
57 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-04.csv
58 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-05.csv
59 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-06.csv
60 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-07.csv
61 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-08.csv
62 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-09.csv
63 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-10.csv
64 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-11.csv
65 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-12.csv
66 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-01.csv
67 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-02.csv
68 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-03.csv
69 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-04.csv
70 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-05.csv
71 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-06.csv
72 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-07.csv
73 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-08.csv
74 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-09.csv
75 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-10.csv
76 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-11.csv
77 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-12.csv
78 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-01.csv
79 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-02.csv
80 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-03.csv
81 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-04.csv
82 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-05.csv
83 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-06.csv
84 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-07.csv
85 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-08.csv
86 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-09.csv
87 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-10.csv
88 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-11.csv
89 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-12.csv
90 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-01.csv
91 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-02.csv
92 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-03.csv
93 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-04.csv
94 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-05.csv
95 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-06.csv
96 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-07.csv
97 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-08.csv
98 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-09.csv
99 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-10.csv
100 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-11.csv
101 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-12.csv
102 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-01.csv
103 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-02.csv
104 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-03.csv
105 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-04.csv
106 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-05.csv
107 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-06.csv
108 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-07.csv
109 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-08.csv
110 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-09.csv
111 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-10.csv
112 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-11.csv
113 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-12.csv
114 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-01.csv
115 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-02.csv
116 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-03.csv
117 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-04.csv
118 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-05.csv
119 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-06.csv
120 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-07.csv
121 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-08.csv
122 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-09.csv
123 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-10.csv
124 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-11.csv
125 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-12.csv
126 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-01.csv
127 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-02.csv
128 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-03.csv
129 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-04.csv
130 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-05.csv
131 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-06.csv
132 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-07.csv
133 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-08.csv
134 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-09.csv
135 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-10.csv
136 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-11.csv
137 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-12.csv
138 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-01.csv
139 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-02.csv
140 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-03.csv
141 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-04.csv
142 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-05.csv
143 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-06.csv
144 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-07.csv
145 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-08.csv
146 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-09.csv
147 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-10.csv
148 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-11.csv
149 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-12.csv
150 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-01.csv
151 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-02.csv
152 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-03.csv
153 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-04.csv
154 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-05.csv
155 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-06.csv
156 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-07.csv
157 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-08.csv
158 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-09.csv
159 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-10.csv
160 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-11.csv
161 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-12.csv
162 |
--------------------------------------------------------------------------------
/data/scientists.csv:
--------------------------------------------------------------------------------
1 | Name,Born,Died,Age,Occupation
2 | Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
3 | William Gosset,1876-06-13,1937-10-16,61,Statistician
4 | Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
5 | Marie Curie,1867-11-07,1934-07-04,66,Chemist
6 | Rachel Carson,1907-05-27,1964-04-14,56,Biologist
7 | John Snow,1813-03-15,1858-06-16,45,Physician
8 | Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
9 | Johann Gauss,1777-04-30,1855-02-23,77,Mathematician
10 |
--------------------------------------------------------------------------------
/data/survey_person.csv:
--------------------------------------------------------------------------------
1 | ident,personal,family
2 | dyer,William,Dyer
3 | pb,Frank,Pabodie
4 | lake,Anderson,Lake
5 | roe,Valentina,Roerich
6 | danforth,Frank,Danforth
7 |
--------------------------------------------------------------------------------
/data/survey_site.csv:
--------------------------------------------------------------------------------
1 | name,lat,long
2 | DR-1,-49.85,-128.57
3 | DR-3,-47.15,-126.72
4 | MSK-4,-48.87,-123.4
5 |
--------------------------------------------------------------------------------
/data/survey_survey.csv:
--------------------------------------------------------------------------------
1 | taken,person,quant,reading
2 | 619,dyer,rad,9.82
3 | 619,dyer,sal,0.13
4 | 622,dyer,rad,7.8
5 | 622,dyer,sal,0.09
6 | 734,pb,rad,8.41
7 | 734,lake,sal,0.05
8 | 734,pb,temp,-21.5
9 | 735,pb,rad,7.22
10 | 735,,sal,0.06
11 | 735,,temp,-26.0
12 | 751,pb,rad,4.35
13 | 751,pb,temp,-18.5
14 | 751,lake,sal,0.1
15 | 752,lake,rad,2.19
16 | 752,lake,sal,0.09
17 | 752,lake,temp,-16.0
18 | 752,roe,sal,41.6
19 | 837,lake,rad,1.46
20 | 837,lake,sal,0.21
21 | 837,roe,sal,22.5
22 | 844,roe,rad,11.25
23 |
--------------------------------------------------------------------------------
/data/survey_visited.csv:
--------------------------------------------------------------------------------
1 | ident,site,dated
2 | 619,DR-1,1927-02-08
3 | 622,DR-1,1927-02-10
4 | 734,DR-3,1939-01-07
5 | 735,DR-3,1930-01-12
6 | 751,DR-3,1930-02-26
7 | 752,DR-3,
8 | 837,MSK-4,1932-01-14
9 | 844,DR-1,1932-03-22
10 |
--------------------------------------------------------------------------------
/data/weather.csv:
--------------------------------------------------------------------------------
1 | "id","year","month","element","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30","d31"
2 | "MX17004",2010,1,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,27.8,NA
3 | "MX17004",2010,1,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,14.5,NA
4 | "MX17004",2010,2,"tmax",NA,27.3,24.1,NA,NA,NA,NA,NA,NA,NA,29.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA
5 | "MX17004",2010,2,"tmin",NA,14.4,14.4,NA,NA,NA,NA,NA,NA,NA,13.4,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,10.7,NA,NA,NA,NA,NA,NA,NA,NA
6 | "MX17004",2010,3,"tmax",NA,NA,NA,NA,32.1,NA,NA,NA,NA,34.5,NA,NA,NA,NA,NA,31.1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
7 | "MX17004",2010,3,"tmin",NA,NA,NA,NA,14.2,NA,NA,NA,NA,16.8,NA,NA,NA,NA,NA,17.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
8 | "MX17004",2010,4,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,36.3,NA,NA,NA,NA
9 | "MX17004",2010,4,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.7,NA,NA,NA,NA
10 | "MX17004",2010,5,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,33.2,NA,NA,NA,NA
11 | "MX17004",2010,5,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18.2,NA,NA,NA,NA
12 | "MX17004",2010,6,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,30.1,NA,NA
13 | "MX17004",2010,6,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18,NA,NA
14 | "MX17004",2010,7,"tmax",NA,NA,28.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
15 | "MX17004",2010,7,"tmin",NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
16 | "MX17004",2010,8,"tmax",NA,NA,NA,NA,29.6,NA,NA,29,NA,NA,NA,NA,29.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,26.4,NA,29.7,NA,NA,NA,28,NA,25.4
17 | "MX17004",2010,8,"tmin",NA,NA,NA,NA,15.8,NA,NA,17.3,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,15.6,NA,NA,NA,15.3,NA,15.4
18 | "MX17004",2010,10,"tmax",NA,NA,NA,NA,27,NA,28.1,NA,NA,NA,NA,NA,NA,29.5,28.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,31.2,NA,NA,NA
19 | "MX17004",2010,10,"tmin",NA,NA,NA,NA,14,NA,12.9,NA,NA,NA,NA,NA,NA,13,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,NA,NA
20 | "MX17004",2010,11,"tmax",NA,31.3,NA,27.2,26.3,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28.1,27.7,NA,NA,NA,NA
21 | "MX17004",2010,11,"tmin",NA,16.3,NA,12,7.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,12.1,14.2,NA,NA,NA,NA
22 | "MX17004",2010,12,"tmax",29.9,NA,NA,NA,NA,27.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
23 | "MX17004",2010,12,"tmin",13.8,NA,NA,NA,NA,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
24 |
--------------------------------------------------------------------------------
/notebook/.ipynb_checkpoints/08_done-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 자료형을 자유자재로 변환하기 ─ astype 메서드(172쪽)"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import pandas as pd\n",
18 | "import seaborn as sns\n",
19 | "\n",
20 | "tips = sns.load_dataset(\"tips\")"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "### 2. 여러 가지 자료형을 문자열로 변환하기"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "name": "stdout",
37 | "output_type": "stream",
38 | "text": [
39 | "total_bill float64\n",
40 | "tip float64\n",
41 | "sex category\n",
42 | "smoker category\n",
43 | "day category\n",
44 | "time category\n",
45 | "size int64\n",
46 | "sex_str object\n",
47 | "dtype: object\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "tips['sex_str'] = tips['sex'].astype(str)\n",
53 | "print(tips.dtypes)"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "### 4. 자료형을 변환한 데이터 다시 원래대로 만들기"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 4,
66 | "metadata": {},
67 | "outputs": [
68 | {
69 | "name": "stdout",
70 | "output_type": "stream",
71 | "text": [
72 | "total_bill object\n",
73 | "tip float64\n",
74 | "sex category\n",
75 | "smoker category\n",
76 | "day category\n",
77 | "time category\n",
78 | "size int64\n",
79 | "sex_str object\n",
80 | "dtype: object\n"
81 | ]
82 | }
83 | ],
84 | "source": [
85 | "tips['total_bill'] = tips['total_bill'].astype(str) \n",
86 | "print(tips.dtypes)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 5,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "name": "stdout",
96 | "output_type": "stream",
97 | "text": [
98 | "total_bill float64\n",
99 | "tip float64\n",
100 | "sex category\n",
101 | "smoker category\n",
102 | "day category\n",
103 | "time category\n",
104 | "size int64\n",
105 | "sex_str object\n",
106 | "dtype: object\n"
107 | ]
108 | }
109 | ],
110 | "source": [
111 | "tips['total_bill'] = tips['total_bill'].astype(float) \n",
112 | "print(tips.dtypes)"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 직접 해보세요!\n",
120 | "## 잘못 입력한 문자열 처리하기 ─ to_numeric 메서드(174쪽)"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 6,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "name": "stdout",
130 | "output_type": "stream",
131 | "text": [
132 | " total_bill tip sex smoker day time size sex_str\n",
133 | "0 16.99 1.01 Female No Sun Dinner 2 Female\n",
134 | "1 missing 1.66 Male No Sun Dinner 3 Male\n",
135 | "2 21.01 3.50 Male No Sun Dinner 3 Male\n",
136 | "3 missing 3.31 Male No Sun Dinner 2 Male\n",
137 | "4 24.59 3.61 Female No Sun Dinner 4 Female\n",
138 | "5 missing 4.71 Male No Sun Dinner 4 Male\n",
139 | "6 8.77 2.00 Male No Sun Dinner 2 Male\n",
140 | "7 missing 3.12 Male No Sun Dinner 4 Male\n",
141 | "8 15.04 1.96 Male No Sun Dinner 2 Male\n",
142 | "9 14.78 3.23 Male No Sun Dinner 2 Male\n"
143 | ]
144 | },
145 | {
146 | "name": "stderr",
147 | "output_type": "stream",
148 | "text": [
149 | "C:\\Users\\phk70\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:543: SettingWithCopyWarning: \n",
150 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
151 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
152 | "\n",
153 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
154 | " self.obj[item] = s\n"
155 | ]
156 | }
157 | ],
158 | "source": [
159 | "tips_sub_miss = tips.head(10)\n",
160 | "tips_sub_miss.loc[[1, 3, 5, 7], 'total_bill'] = 'missing'\n",
161 | "\n",
162 | "print(tips_sub_miss)"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 8,
168 | "metadata": {},
169 | "outputs": [
170 | {
171 | "name": "stdout",
172 | "output_type": "stream",
173 | "text": [
174 | "total_bill object\n",
175 | "tip float64\n",
176 | "sex category\n",
177 | "smoker category\n",
178 | "day category\n",
179 | "time category\n",
180 | "size int64\n",
181 | "sex_str object\n",
182 | "dtype: object\n"
183 | ]
184 | }
185 | ],
186 | "source": [
187 | "print(tips_sub_miss.dtypes)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 9,
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "ename": "ValueError",
197 | "evalue": "could not convert string to float: 'missing'",
198 | "output_type": "error",
199 | "traceback": [
200 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
201 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
202 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
203 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 175\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 177\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 178\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 179\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_deprecate_kwarg\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
204 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[0;32m 4995\u001b[0m \u001b[1;31m# else, only a single dtype is given\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4996\u001b[0m new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,\n\u001b[1;32m-> 4997\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 4998\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4999\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
205 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[0;32m 3712\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3713\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3714\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'astype'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3715\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3716\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
206 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[0;32m 3579\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3580\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mgr'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3581\u001b[1;33m \u001b[0mapplied\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3582\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3583\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
207 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[0;32m 573\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'raise'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 574\u001b[0m return self._astype(dtype, copy=copy, errors=errors, values=values,\n\u001b[1;32m--> 575\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 576\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 577\u001b[0m def _astype(self, dtype, copy=False, errors='raise', values=None,\n",
208 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36m_astype\u001b[1;34m(self, dtype, copy, errors, values, klass, mgr, **kwargs)\u001b[0m\n\u001b[0;32m 662\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 663\u001b[0m \u001b[1;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 664\u001b[1;33m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 665\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 666\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
209 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[1;34m(arr, dtype, copy)\u001b[0m\n\u001b[0;32m 728\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 729\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 730\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 731\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 732\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
210 | "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'missing'"
211 | ]
212 | }
213 | ],
214 | "source": [
215 | "tips_sub_miss['total_bill'].astype(float)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 10,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "ename": "ValueError",
225 | "evalue": "Unable to parse string \"missing\" at position 1",
226 | "output_type": "error",
227 | "traceback": [
228 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
229 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
230 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n",
231 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\"",
232 | "\nDuring handling of the above exception, another exception occurred:\n",
233 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
234 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
235 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\tools\\numeric.py\u001b[0m in \u001b[0;36mto_numeric\u001b[1;34m(arg, errors, downcast)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[0mcoerce_numeric\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'raise'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 132\u001b[0m values = lib.maybe_convert_numeric(values, set(),\n\u001b[1;32m--> 133\u001b[1;33m coerce_numeric=coerce_numeric)\n\u001b[0m\u001b[0;32m 134\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
236 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n",
237 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\" at position 1"
238 | ]
239 | }
240 | ],
241 | "source": [
242 | "pd.to_numeric(tips_sub_miss['total_bill'])"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 11,
248 | "metadata": {},
249 | "outputs": [
250 | {
251 | "name": "stdout",
252 | "output_type": "stream",
253 | "text": [
254 | "total_bill object\n",
255 | "tip float64\n",
256 | "sex category\n",
257 | "smoker category\n",
258 | "day category\n",
259 | "time category\n",
260 | "size int64\n",
261 | "sex_str object\n",
262 | "dtype: object\n"
263 | ]
264 | },
265 | {
266 | "name": "stderr",
267 | "output_type": "stream",
268 | "text": [
269 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
270 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
271 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
272 | "\n",
273 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
274 | " \"\"\"Entry point for launching an IPython kernel.\n"
275 | ]
276 | }
277 | ],
278 | "source": [
279 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='ignore')\n",
280 | "\n",
281 | "print(tips_sub_miss.dtypes)"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 12,
287 | "metadata": {},
288 | "outputs": [
289 | {
290 | "name": "stdout",
291 | "output_type": "stream",
292 | "text": [
293 | "total_bill float64\n",
294 | "tip float64\n",
295 | "sex category\n",
296 | "smoker category\n",
297 | "day category\n",
298 | "time category\n",
299 | "size int64\n",
300 | "sex_str object\n",
301 | "dtype: object\n"
302 | ]
303 | },
304 | {
305 | "name": "stderr",
306 | "output_type": "stream",
307 | "text": [
308 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
309 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
310 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
311 | "\n",
312 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
313 | " \"\"\"Entry point for launching an IPython kernel.\n"
314 | ]
315 | }
316 | ],
317 | "source": [
318 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce')\n",
319 | "\n",
320 | "print(tips_sub_miss.dtypes)"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 13,
326 | "metadata": {},
327 | "outputs": [
328 | {
329 | "name": "stdout",
330 | "output_type": "stream",
331 | "text": [
332 | "total_bill float32\n",
333 | "tip float64\n",
334 | "sex category\n",
335 | "smoker category\n",
336 | "day category\n",
337 | "time category\n",
338 | "size int64\n",
339 | "sex_str object\n",
340 | "dtype: object\n"
341 | ]
342 | },
343 | {
344 | "name": "stderr",
345 | "output_type": "stream",
346 | "text": [
347 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
348 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
349 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
350 | "\n",
351 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
352 | " \"\"\"Entry point for launching an IPython kernel.\n"
353 | ]
354 | }
355 | ],
356 | "source": [
357 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce', downcast='float')\n",
358 | "\n",
359 | "print(tips_sub_miss.dtypes)"
360 | ]
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {},
365 | "source": [
366 | "# 직접 해보세요!\n",
367 | "## 문자열을 카테고리로 변환하기(179쪽)"
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": 14,
373 | "metadata": {},
374 | "outputs": [
375 | {
376 | "name": "stdout",
377 | "output_type": "stream",
378 | "text": [
379 | "\n",
380 | "RangeIndex: 244 entries, 0 to 243\n",
381 | "Data columns (total 8 columns):\n",
382 | "total_bill 244 non-null float64\n",
383 | "tip 244 non-null float64\n",
384 | "sex 244 non-null object\n",
385 | "smoker 244 non-null category\n",
386 | "day 244 non-null category\n",
387 | "time 244 non-null category\n",
388 | "size 244 non-null int64\n",
389 | "sex_str 244 non-null object\n",
390 | "dtypes: category(3), float64(2), int64(1), object(2)\n",
391 | "memory usage: 10.7+ KB\n",
392 | "None\n"
393 | ]
394 | }
395 | ],
396 | "source": [
397 | "tips['sex'] = tips['sex'].astype('str') \n",
398 | "print(tips.info())"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": 15,
404 | "metadata": {},
405 | "outputs": [
406 | {
407 | "name": "stdout",
408 | "output_type": "stream",
409 | "text": [
410 | "\n",
411 | "RangeIndex: 244 entries, 0 to 243\n",
412 | "Data columns (total 8 columns):\n",
413 | "total_bill 244 non-null float64\n",
414 | "tip 244 non-null float64\n",
415 | "sex 244 non-null category\n",
416 | "smoker 244 non-null category\n",
417 | "day 244 non-null category\n",
418 | "time 244 non-null category\n",
419 | "size 244 non-null int64\n",
420 | "sex_str 244 non-null object\n",
421 | "dtypes: category(4), float64(2), int64(1), object(1)\n",
422 | "memory usage: 9.1+ KB\n",
423 | "None\n"
424 | ]
425 | }
426 | ],
427 | "source": [
428 | "tips['sex'] = tips['sex'].astype('category') \n",
429 | "print(tips.info())"
430 | ]
431 | }
432 | ],
433 | "metadata": {
434 | "kernelspec": {
435 | "display_name": "Python 3",
436 | "language": "python",
437 | "name": "python3"
438 | },
439 | "language_info": {
440 | "codemirror_mode": {
441 | "name": "ipython",
442 | "version": 3
443 | },
444 | "file_extension": ".py",
445 | "mimetype": "text/x-python",
446 | "name": "python",
447 | "nbconvert_exporter": "python",
448 | "pygments_lexer": "ipython3",
449 | "version": "3.6.5"
450 | }
451 | },
452 | "nbformat": 4,
453 | "nbformat_minor": 1
454 | }
455 |
--------------------------------------------------------------------------------
/notebook/.ipynb_checkpoints/09_done-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 문자열 추출하기(183쪽)"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "word = 'grail'\n",
18 | "sent = 'a scratch'"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "name": "stdout",
28 | "output_type": "stream",
29 | "text": [
30 | "g\n"
31 | ]
32 | }
33 | ],
34 | "source": [
35 | "print(word[0])"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 3,
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "name": "stdout",
45 | "output_type": "stream",
46 | "text": [
47 | "a\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "print(sent[0])"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 4,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "name": "stdout",
62 | "output_type": "stream",
63 | "text": [
64 | "gra\n"
65 | ]
66 | }
67 | ],
68 | "source": [
69 | "print(word[0:3])"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "---"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 5,
82 | "metadata": {},
83 | "outputs": [
84 | {
85 | "name": "stdout",
86 | "output_type": "stream",
87 | "text": [
88 | "h\n"
89 | ]
90 | }
91 | ],
92 | "source": [
93 | "print(sent[-1])"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 6,
99 | "metadata": {},
100 | "outputs": [
101 | {
102 | "name": "stdout",
103 | "output_type": "stream",
104 | "text": [
105 | "a\n"
106 | ]
107 | }
108 | ],
109 | "source": [
110 | "print(sent[-9:-8])"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 7,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "a\n"
123 | ]
124 | }
125 | ],
126 | "source": [
127 | "print(sent[0:-8])"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "# 알아두면 좋아요!\n",
135 | "## 전체 문자열을 추출할 때 음수를 사용하면 안 됩니다(184쪽)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 8,
141 | "metadata": {},
142 | "outputs": [
143 | {
144 | "name": "stdout",
145 | "output_type": "stream",
146 | "text": [
147 | "scratc\n"
148 | ]
149 | }
150 | ],
151 | "source": [
152 | "print(sent[2:-1])"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 11,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "name": "stdout",
162 | "output_type": "stream",
163 | "text": [
164 | "scratc\n"
165 | ]
166 | }
167 | ],
168 | "source": [
169 | "print(sent[-7:-1])"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 12,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "name": "stdout",
179 | "output_type": "stream",
180 | "text": [
181 | "9\n"
182 | ]
183 | }
184 | ],
185 | "source": [
186 | "s_len = len(sent)\n",
187 | "print(s_len)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 13,
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "name": "stdout",
197 | "output_type": "stream",
198 | "text": [
199 | "scratch\n"
200 | ]
201 | }
202 | ],
203 | "source": [
204 | "print(sent[2:s_len])"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "# 직접 해보세요!\n",
212 | "## 왼쪽이나 오른쪽 범위를 지정하지 않고 문자열 추출하기(185쪽)"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 14,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "gra\n"
225 | ]
226 | }
227 | ],
228 | "source": [
229 | "print(word[0:3])"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 15,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "name": "stdout",
239 | "output_type": "stream",
240 | "text": [
241 | "gra\n"
242 | ]
243 | }
244 | ],
245 | "source": [
246 | "print(word[ :3])"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 16,
252 | "metadata": {},
253 | "outputs": [
254 | {
255 | "name": "stdout",
256 | "output_type": "stream",
257 | "text": [
258 | "scratch\n"
259 | ]
260 | }
261 | ],
262 | "source": [
263 | "print(sent[2:len(sent)])"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 17,
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "name": "stdout",
273 | "output_type": "stream",
274 | "text": [
275 | "scratch\n"
276 | ]
277 | }
278 | ],
279 | "source": [
280 | "print(sent[2: ])"
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 18,
286 | "metadata": {},
287 | "outputs": [
288 | {
289 | "name": "stdout",
290 | "output_type": "stream",
291 | "text": [
292 | "a scratch\n"
293 | ]
294 | }
295 | ],
296 | "source": [
297 | "print(sent[ : ])"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": 19,
303 | "metadata": {},
304 | "outputs": [
305 | {
306 | "name": "stdout",
307 | "output_type": "stream",
308 | "text": [
309 | "asrth\n"
310 | ]
311 | }
312 | ],
313 | "source": [
314 | "print(sent[::2])"
315 | ]
316 | },
317 | {
318 | "cell_type": "markdown",
319 | "metadata": {},
320 | "source": [
321 | "# 직접 해보세요!\n",
322 | "## join, splitlines, replace 메서드 실습하기(188쪽)"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {},
328 | "source": [
329 | "### 1. join 메서드"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 20,
335 | "metadata": {},
336 | "outputs": [],
337 | "source": [
338 | "d1 = '40°' \n",
339 | "m1 = \"46'\" \n",
340 | "s1 = '52.837\"' \n",
341 | "u1 = 'N'\n",
342 | "\n",
343 | "d2 = '73°' \n",
344 | "m2 = \"58'\" \n",
345 | "s2 = '26.302\"' \n",
346 | "u2 = 'W'"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 21,
352 | "metadata": {},
353 | "outputs": [
354 | {
355 | "name": "stdout",
356 | "output_type": "stream",
357 | "text": [
358 | "40° 46' 52.837\" N 73° 58' 26.302\" W\n"
359 | ]
360 | }
361 | ],
362 | "source": [
363 | "coords = ' '.join([d1, m1, s1, u1, d2, m2, s2, u2])\n",
364 | "print(coords)"
365 | ]
366 | },
367 | {
368 | "cell_type": "markdown",
369 | "metadata": {},
370 | "source": [
371 | "### 2. splitlines 메서드"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 22,
377 | "metadata": {},
378 | "outputs": [
379 | {
380 | "name": "stdout",
381 | "output_type": "stream",
382 | "text": [
383 | "Guard: What? Ridden on a horse?\n",
384 | "King Arthur: Yes!\n",
385 | "Guard: You're using coconuts!\n",
386 | "King Arthur: What?\n",
387 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n",
388 | "\n"
389 | ]
390 | }
391 | ],
392 | "source": [
393 | "multi_str = \"\"\"Guard: What? Ridden on a horse?\n",
394 | "King Arthur: Yes!\n",
395 | "Guard: You're using coconuts!\n",
396 | "King Arthur: What?\n",
397 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n",
398 | "\"\"\" \n",
399 | "print(multi_str)"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 23,
405 | "metadata": {},
406 | "outputs": [
407 | {
408 | "name": "stdout",
409 | "output_type": "stream",
410 | "text": [
411 | "['Guard: What? Ridden on a horse?', 'King Arthur: Yes!', \"Guard: You're using coconuts!\", 'King Arthur: What?', \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n"
412 | ]
413 | }
414 | ],
415 | "source": [
416 | "multi_str_split = multi_str.splitlines() \n",
417 | "print(multi_str_split)"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": 24,
423 | "metadata": {},
424 | "outputs": [
425 | {
426 | "name": "stdout",
427 | "output_type": "stream",
428 | "text": [
429 | "['Guard: What? Ridden on a horse?', \"Guard: You're using coconuts!\", \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n"
430 | ]
431 | }
432 | ],
433 | "source": [
434 | "guard = multi_str_split[::2] \n",
435 | "print(guard)"
436 | ]
437 | },
438 | {
439 | "cell_type": "markdown",
440 | "metadata": {},
441 | "source": [
442 | "### 4. replace 메서드"
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": 25,
448 | "metadata": {},
449 | "outputs": [
450 | {
451 | "name": "stdout",
452 | "output_type": "stream",
453 | "text": [
454 | "['What? Ridden on a horse?', \"You're using coconuts!\", \"You've got ... coconut[s] and you're bangin' 'em together. \"]\n"
455 | ]
456 | }
457 | ],
458 | "source": [
459 | "guard = multi_str.replace(\"Guard: \", \"\").splitlines()[::2] \n",
460 | "print(guard)"
461 | ]
462 | },
463 | {
464 | "cell_type": "markdown",
465 | "metadata": {},
466 | "source": [
467 | "# 직접 해보세요!\n",
468 | "## 문자열 포매팅하기(190쪽)"
469 | ]
470 | },
471 | {
472 | "cell_type": "code",
473 | "execution_count": 26,
474 | "metadata": {},
475 | "outputs": [
476 | {
477 | "name": "stdout",
478 | "output_type": "stream",
479 | "text": [
480 | "It's just a flesh wound!\n"
481 | ]
482 | }
483 | ],
484 | "source": [
485 | "var = 'flesh wound' \n",
486 | "s = \"It's just a {}!\"\n",
487 | "\n",
488 | "print(s.format(var))"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 27,
494 | "metadata": {},
495 | "outputs": [
496 | {
497 | "name": "stdout",
498 | "output_type": "stream",
499 | "text": [
500 | "It's just a scratch!\n"
501 | ]
502 | }
503 | ],
504 | "source": [
505 | "print(s.format('scratch'))"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 28,
511 | "metadata": {},
512 | "outputs": [
513 | {
514 | "name": "stdout",
515 | "output_type": "stream",
516 | "text": [
517 | "Black Knight: 'Tis but a scratch.\n",
518 | "King Arthur: A scratch? Your arm's off!\n",
519 | "\n"
520 | ]
521 | }
522 | ],
523 | "source": [
524 | "s = \"\"\"Black Knight: 'Tis but a {0}.\n",
525 | "King Arthur: A {0}? Your arm's off!\n",
526 | "\"\"\" \n",
527 | "print(s.format('scratch'))"
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "execution_count": 29,
533 | "metadata": {},
534 | "outputs": [
535 | {
536 | "name": "stdout",
537 | "output_type": "stream",
538 | "text": [
539 | "Hayden Planetarium Coordinates: 40.7815° N, 73.9733° W\n"
540 | ]
541 | }
542 | ],
543 | "source": [
544 | "s = 'Hayden Planetarium Coordinates: {lat}, {lon}' \n",
545 | "print(s.format(lat='40.7815° N', lon='73.9733° W'))"
546 | ]
547 | },
548 | {
549 | "cell_type": "markdown",
550 | "metadata": {},
551 | "source": [
552 | "# 직접 해보세요!\n",
553 | "## 숫자 데이터 포매팅하기(191쪽)"
554 | ]
555 | },
556 | {
557 | "cell_type": "code",
558 | "execution_count": 30,
559 | "metadata": {},
560 | "outputs": [
561 | {
562 | "name": "stdout",
563 | "output_type": "stream",
564 | "text": [
565 | "Some digits of pi: 3.14159265359\n"
566 | ]
567 | }
568 | ],
569 | "source": [
570 | "print('Some digits of pi: {}'.format(3.14159265359))"
571 | ]
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": 31,
576 | "metadata": {},
577 | "outputs": [
578 | {
579 | "name": "stdout",
580 | "output_type": "stream",
581 | "text": [
582 | "In 2005, Lu Chao of China recited 67,890 digits of pi\n"
583 | ]
584 | }
585 | ],
586 | "source": [
587 | "print(\"In 2005, Lu Chao of China recited {:,} digits of pi\".format(67890))"
588 | ]
589 | },
590 | {
591 | "cell_type": "code",
592 | "execution_count": 32,
593 | "metadata": {},
594 | "outputs": [
595 | {
596 | "name": "stdout",
597 | "output_type": "stream",
598 | "text": [
599 | "I remember 0.0001031 or 0.0103% of what Lu Chao recited\n"
600 | ]
601 | }
602 | ],
603 | "source": [
604 | "print(\"I remember {0:.4} or {0:.4%} of what Lu Chao recited\".format(7/67890))"
605 | ]
606 | },
607 | {
608 | "cell_type": "code",
609 | "execution_count": 33,
610 | "metadata": {},
611 | "outputs": [
612 | {
613 | "name": "stdout",
614 | "output_type": "stream",
615 | "text": [
616 | "My ID number is 00042\n"
617 | ]
618 | }
619 | ],
620 | "source": [
621 | "print(\"My ID number is {0:05d}\".format(42))"
622 | ]
623 | },
624 | {
625 | "cell_type": "markdown",
626 | "metadata": {},
627 | "source": [
628 | "# 직접 해보세요!\n",
629 | "## % 연산자로 포매팅하기(192쪽)"
630 | ]
631 | },
632 | {
633 | "cell_type": "code",
634 | "execution_count": 34,
635 | "metadata": {},
636 | "outputs": [
637 | {
638 | "name": "stdout",
639 | "output_type": "stream",
640 | "text": [
641 | "I only know 7 digits of pi\n"
642 | ]
643 | }
644 | ],
645 | "source": [
646 | "s = 'I only know %d digits of pi' % 7 \n",
647 | "print(s)"
648 | ]
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": 35,
653 | "metadata": {},
654 | "outputs": [
655 | {
656 | "name": "stdout",
657 | "output_type": "stream",
658 | "text": [
659 | "Some digits of e: 2.72\n"
660 | ]
661 | }
662 | ],
663 | "source": [
664 | "print('Some digits of %(cont)s: %(value).2f' % {'cont': 'e', 'value': 2.718})"
665 | ]
666 | },
667 | {
668 | "cell_type": "markdown",
669 | "metadata": {},
670 | "source": [
671 | "# 알아두면 좋아요!\n",
672 | "## f-strings로 포매팅 사용하기(193쪽)"
673 | ]
674 | },
675 | {
676 | "cell_type": "code",
677 | "execution_count": 36,
678 | "metadata": {},
679 | "outputs": [
680 | {
681 | "name": "stdout",
682 | "output_type": "stream",
683 | "text": [
684 | "It's just a flesh wound!\n"
685 | ]
686 | }
687 | ],
688 | "source": [
689 | "var = 'flesh wound' \n",
690 | "s = f\"It's just a {var}!\" \n",
691 | "print(s)"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 37,
697 | "metadata": {},
698 | "outputs": [
699 | {
700 | "name": "stdout",
701 | "output_type": "stream",
702 | "text": [
703 | "Hayden Planetarium Coordinates: 40.7815°N, 73.9733°W\n"
704 | ]
705 | }
706 | ],
707 | "source": [
708 | "lat='40.7815°N' \n",
709 | "lon='73.9733°W' \n",
710 | "s = f'Hayden Planetarium Coordinates: {lat}, {lon}' \n",
711 | "print(s)"
712 | ]
713 | },
714 | {
715 | "cell_type": "markdown",
716 | "metadata": {},
717 | "source": [
718 | "# 직접 해보세요!\n",
719 | "## 정규식으로 전화번호 패턴 찾기(196쪽)"
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "execution_count": 38,
725 | "metadata": {},
726 | "outputs": [],
727 | "source": [
728 | "import re\n",
729 | "\n",
730 | "tele_num = '1234567890'"
731 | ]
732 | },
733 | {
734 | "cell_type": "code",
735 | "execution_count": 39,
736 | "metadata": {},
737 | "outputs": [
738 | {
739 | "name": "stdout",
740 | "output_type": "stream",
741 | "text": [
742 | "\n"
743 | ]
744 | }
745 | ],
746 | "source": [
747 | "m = re.match(pattern='\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d', string=tele_num) \n",
748 | "print(type(m))"
749 | ]
750 | },
751 | {
752 | "cell_type": "code",
753 | "execution_count": 40,
754 | "metadata": {},
755 | "outputs": [
756 | {
757 | "name": "stdout",
758 | "output_type": "stream",
759 | "text": [
760 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n"
761 | ]
762 | }
763 | ],
764 | "source": [
765 | "print(m)"
766 | ]
767 | },
768 | {
769 | "cell_type": "code",
770 | "execution_count": 41,
771 | "metadata": {},
772 | "outputs": [
773 | {
774 | "name": "stdout",
775 | "output_type": "stream",
776 | "text": [
777 | "True\n"
778 | ]
779 | }
780 | ],
781 | "source": [
782 | "print(bool(m))"
783 | ]
784 | },
785 | {
786 | "cell_type": "code",
787 | "execution_count": 42,
788 | "metadata": {},
789 | "outputs": [
790 | {
791 | "name": "stdout",
792 | "output_type": "stream",
793 | "text": [
794 | "match\n"
795 | ]
796 | }
797 | ],
798 | "source": [
799 | "if m:\n",
800 | " print('match') \n",
801 | "else:\n",
802 | " print('no match')"
803 | ]
804 | },
805 | {
806 | "cell_type": "code",
807 | "execution_count": 43,
808 | "metadata": {},
809 | "outputs": [
810 | {
811 | "name": "stdout",
812 | "output_type": "stream",
813 | "text": [
814 | "0\n"
815 | ]
816 | }
817 | ],
818 | "source": [
819 | "print(m.start())"
820 | ]
821 | },
822 | {
823 | "cell_type": "code",
824 | "execution_count": 44,
825 | "metadata": {},
826 | "outputs": [
827 | {
828 | "name": "stdout",
829 | "output_type": "stream",
830 | "text": [
831 | "10\n"
832 | ]
833 | }
834 | ],
835 | "source": [
836 | "print(m.end())"
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": 45,
842 | "metadata": {},
843 | "outputs": [
844 | {
845 | "name": "stdout",
846 | "output_type": "stream",
847 | "text": [
848 | "(0, 10)\n"
849 | ]
850 | }
851 | ],
852 | "source": [
853 | "print(m.span())"
854 | ]
855 | },
856 | {
857 | "cell_type": "code",
858 | "execution_count": 46,
859 | "metadata": {},
860 | "outputs": [
861 | {
862 | "name": "stdout",
863 | "output_type": "stream",
864 | "text": [
865 | "1234567890\n"
866 | ]
867 | }
868 | ],
869 | "source": [
870 | "print(m.group())"
871 | ]
872 | },
873 | {
874 | "cell_type": "code",
875 | "execution_count": 47,
876 | "metadata": {},
877 | "outputs": [],
878 | "source": [
879 | "tele_num_spaces = '123 456 7890'"
880 | ]
881 | },
882 | {
883 | "cell_type": "code",
884 | "execution_count": 48,
885 | "metadata": {},
886 | "outputs": [
887 | {
888 | "name": "stdout",
889 | "output_type": "stream",
890 | "text": [
891 | "None\n"
892 | ]
893 | }
894 | ],
895 | "source": [
896 | "m = re.match(pattern='\\d{10}', string=tele_num_spaces) \n",
897 | "print(m)"
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 49,
903 | "metadata": {},
904 | "outputs": [
905 | {
906 | "name": "stdout",
907 | "output_type": "stream",
908 | "text": [
909 | "no match\n"
910 | ]
911 | }
912 | ],
913 | "source": [
914 | "if m:\n",
915 | " print('match') \n",
916 | "else:\n",
917 | " print('no match')"
918 | ]
919 | },
920 | {
921 | "cell_type": "code",
922 | "execution_count": 50,
923 | "metadata": {},
924 | "outputs": [
925 | {
926 | "name": "stdout",
927 | "output_type": "stream",
928 | "text": [
929 | "<_sre.SRE_Match object; span=(0, 12), match='123 456 7890'>\n"
930 | ]
931 | }
932 | ],
933 | "source": [
934 | "p = '\\d{3}\\s?\\d{3}\\s?\\d{4}' \n",
935 | "m = re.match(pattern=p, string=tele_num_spaces) \n",
936 | "print(m)"
937 | ]
938 | },
939 | {
940 | "cell_type": "code",
941 | "execution_count": 51,
942 | "metadata": {},
943 | "outputs": [
944 | {
945 | "name": "stdout",
946 | "output_type": "stream",
947 | "text": [
948 | "<_sre.SRE_Match object; span=(0, 14), match='(123) 456-7890'>\n"
949 | ]
950 | }
951 | ],
952 | "source": [
953 | "tele_num_space_paren_dash = '(123) 456-7890' \n",
954 | "p = '\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n",
955 | "m = re.match(pattern=p, string=tele_num_space_paren_dash) \n",
956 | "print(m)"
957 | ]
958 | },
959 | {
960 | "cell_type": "code",
961 | "execution_count": 52,
962 | "metadata": {},
963 | "outputs": [
964 | {
965 | "name": "stdout",
966 | "output_type": "stream",
967 | "text": [
968 | "<_sre.SRE_Match object; span=(0, 17), match='+1 (123) 456-7890'>\n"
969 | ]
970 | }
971 | ],
972 | "source": [
973 | "cnty_tele_num_space_paren_dash = '+1 (123) 456-7890' \n",
974 | "p = '\\+?1\\s?\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n",
975 | "m = re.match(pattern=p, string=cnty_tele_num_space_paren_dash) \n",
976 | "print(m)"
977 | ]
978 | },
979 | {
980 | "cell_type": "markdown",
981 | "metadata": {},
982 | "source": [
983 | "# 알아두면 좋아요!\n",
984 | "## compile 메서드로 정규식 메서드 사용하기(200쪽)"
985 | ]
986 | },
987 | {
988 | "cell_type": "code",
989 | "execution_count": 54,
990 | "metadata": {},
991 | "outputs": [
992 | {
993 | "name": "stdout",
994 | "output_type": "stream",
995 | "text": [
996 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n"
997 | ]
998 | }
999 | ],
1000 | "source": [
1001 | "p = re.compile('\\d{10}') \n",
1002 | "s = '1234567890' \n",
1003 | "m = p.match(s) \n",
1004 | "print(m)"
1005 | ]
1006 | }
1007 | ],
1008 | "metadata": {
1009 | "kernelspec": {
1010 | "display_name": "Python 3",
1011 | "language": "python",
1012 | "name": "python3"
1013 | },
1014 | "language_info": {
1015 | "codemirror_mode": {
1016 | "name": "ipython",
1017 | "version": 3
1018 | },
1019 | "file_extension": ".py",
1020 | "mimetype": "text/x-python",
1021 | "name": "python",
1022 | "nbconvert_exporter": "python",
1023 | "pygments_lexer": "ipython3",
1024 | "version": "3.6.5"
1025 | }
1026 | },
1027 | "nbformat": 4,
1028 | "nbformat_minor": 1
1029 | }
1030 |
--------------------------------------------------------------------------------
/notebook/.ipynb_checkpoints/10_done-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 제곱 함수와 n 제곱 함수 만들기(202쪽)"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "def my_sq(x):\n",
18 | " return x ** 2"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "def my_exp(x, n):\n",
28 | " return x ** n"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "16\n"
41 | ]
42 | }
43 | ],
44 | "source": [
45 | "print(my_sq(4))"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 3,
51 | "metadata": {},
52 | "outputs": [
53 | {
54 | "name": "stdout",
55 | "output_type": "stream",
56 | "text": [
57 | "16\n"
58 | ]
59 | }
60 | ],
61 | "source": [
62 | "print(my_exp(2, 4))"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "# 직접 해보세요!\n",
70 | "## 시리즈와 데이터프레임에 apply 메서드 사용하기(203쪽)"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "### 1. 시리즈와 apply 메서드"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 5,
83 | "metadata": {},
84 | "outputs": [
85 | {
86 | "name": "stdout",
87 | "output_type": "stream",
88 | "text": [
89 | " a b\n",
90 | "0 10 20\n",
91 | "1 20 30\n",
92 | "2 30 40\n"
93 | ]
94 | }
95 | ],
96 | "source": [
97 | "import pandas as pd\n",
98 | "\n",
99 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n",
100 | "\n",
101 | "print(df)"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 6,
107 | "metadata": {},
108 | "outputs": [
109 | {
110 | "name": "stdout",
111 | "output_type": "stream",
112 | "text": [
113 | "0 100\n",
114 | "1 400\n",
115 | "2 900\n",
116 | "Name: a, dtype: int64\n"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "print(df['a'] ** 2)"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 7,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "name": "stdout",
131 | "output_type": "stream",
132 | "text": [
133 | "0 100\n",
134 | "1 400\n",
135 | "2 900\n",
136 | "Name: a, dtype: int64\n"
137 | ]
138 | }
139 | ],
140 | "source": [
141 | "sq = df['a'].apply(my_sq) \n",
142 | "print(sq)"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 8,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "name": "stdout",
152 | "output_type": "stream",
153 | "text": [
154 | "0 100\n",
155 | "1 400\n",
156 | "2 900\n",
157 | "Name: a, dtype: int64\n"
158 | ]
159 | }
160 | ],
161 | "source": [
162 | "ex = df['a'].apply(my_exp, n=2) \n",
163 | "print(ex)"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 9,
169 | "metadata": {},
170 | "outputs": [
171 | {
172 | "name": "stdout",
173 | "output_type": "stream",
174 | "text": [
175 | "0 1000\n",
176 | "1 8000\n",
177 | "2 27000\n",
178 | "Name: a, dtype: int64\n"
179 | ]
180 | }
181 | ],
182 | "source": [
183 | "ex = df['a'].apply(my_exp, n=3) \n",
184 | "print(ex)"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {},
190 | "source": [
191 | "### 5. 데이터 프레임과 apply 메서드"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 5,
197 | "metadata": {},
198 | "outputs": [
199 | {
200 | "name": "stdout",
201 | "output_type": "stream",
202 | "text": [
203 | " a b\n",
204 | "0 10 20\n",
205 | "1 20 30\n",
206 | "2 30 40\n"
207 | ]
208 | }
209 | ],
210 | "source": [
211 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n",
212 | "print(df)"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 6,
218 | "metadata": {},
219 | "outputs": [],
220 | "source": [
221 | "def print_me(x): \n",
222 | " print(x)"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 7,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "name": "stdout",
232 | "output_type": "stream",
233 | "text": [
234 | "0 10\n",
235 | "1 20\n",
236 | "2 30\n",
237 | "Name: a, dtype: int64\n",
238 | "0 20\n",
239 | "1 30\n",
240 | "2 40\n",
241 | "Name: b, dtype: int64\n",
242 | "a None\n",
243 | "b None\n",
244 | "dtype: object\n"
245 | ]
246 | }
247 | ],
248 | "source": [
249 | "print(df.apply(print_me, axis=0))"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": 8,
255 | "metadata": {},
256 | "outputs": [
257 | {
258 | "name": "stdout",
259 | "output_type": "stream",
260 | "text": [
261 | "0 10\n",
262 | "1 20\n",
263 | "2 30\n",
264 | "Name: a, dtype: int64\n"
265 | ]
266 | }
267 | ],
268 | "source": [
269 | "print(df['a'])"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 9,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "name": "stdout",
279 | "output_type": "stream",
280 | "text": [
281 | "0 20\n",
282 | "1 30\n",
283 | "2 40\n",
284 | "Name: b, dtype: int64\n"
285 | ]
286 | }
287 | ],
288 | "source": [
289 | "print(df['b'])"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 10,
295 | "metadata": {},
296 | "outputs": [],
297 | "source": [
298 | "def avg_3(x, y, z):\n",
299 | " return (x + y + z) / 3"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": 11,
305 | "metadata": {},
306 | "outputs": [
307 | {
308 | "ename": "TypeError",
309 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')",
310 | "output_type": "error",
311 | "traceback": [
312 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
313 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
314 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
315 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[0;32m 6002\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6003\u001b[0m kwds=kwds)\n\u001b[1;32m-> 6004\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6005\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6006\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
316 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 316\u001b[0m *self.args, **self.kwds)\n\u001b[0;32m 317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 318\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFrameRowApply\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 319\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
317 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 140\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 141\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 142\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 143\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 144\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
318 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 246\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[1;31m# compute the result using the series generator\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 248\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 249\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 250\u001b[0m \u001b[1;31m# wrap results\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
319 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 277\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 278\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 279\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
320 | "\u001b[1;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')"
321 | ]
322 | }
323 | ],
324 | "source": [
325 | "print(df.apply(avg_3))"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": 12,
331 | "metadata": {},
332 | "outputs": [
333 | {
334 | "name": "stdout",
335 | "output_type": "stream",
336 | "text": [
337 | "a 20.0\n",
338 | "b 30.0\n",
339 | "dtype: float64\n"
340 | ]
341 | }
342 | ],
343 | "source": [
344 | "def avg_3_apply(col):\n",
345 | " x = col[0] \n",
346 | " y = col[1] \n",
347 | " z = col[2] \n",
348 | " return (x + y + z) / 3\n",
349 | "\n",
350 | "\n",
351 | "print(df.apply(avg_3_apply))"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 23,
357 | "metadata": {},
358 | "outputs": [],
359 | "source": [
360 | "def avg_3_apply(col):\n",
361 | " sum = 0\n",
362 | " for item in col:\n",
363 | " sum += item\n",
364 | " return sum / df.shape[0]"
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 31,
370 | "metadata": {},
371 | "outputs": [],
372 | "source": [
373 | "def avg_2_apply(row):\n",
374 | " sum = 0\n",
375 | " for item in row:\n",
376 | " sum += item\n",
377 | " return sum / df.shape[1]"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 32,
383 | "metadata": {},
384 | "outputs": [
385 | {
386 | "name": "stdout",
387 | "output_type": "stream",
388 | "text": [
389 | "0 15.0\n",
390 | "1 25.0\n",
391 | "2 35.0\n",
392 | "dtype: float64\n"
393 | ]
394 | }
395 | ],
396 | "source": [
397 | "print(df.apply(avg_2_apply, axis = 1))"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "metadata": {},
403 | "source": [
404 | "# 직접 해보세요!\n",
405 | "## 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기(208쪽)"
406 | ]
407 | },
408 | {
409 | "cell_type": "markdown",
410 | "metadata": {},
411 | "source": [
412 | "### 1. 데이터프레임의 누락값 처리하기 ― 열 방향"
413 | ]
414 | },
415 | {
416 | "cell_type": "code",
417 | "execution_count": 33,
418 | "metadata": {},
419 | "outputs": [],
420 | "source": [
421 | "import seaborn as sns\n",
422 | "\n",
423 | "titanic = sns.load_dataset(\"titanic\")"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 34,
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "name": "stdout",
433 | "output_type": "stream",
434 | "text": [
435 | "\n",
436 | "RangeIndex: 891 entries, 0 to 890\n",
437 | "Data columns (total 15 columns):\n",
438 | "survived 891 non-null int64\n",
439 | "pclass 891 non-null int64\n",
440 | "sex 891 non-null object\n",
441 | "age 714 non-null float64\n",
442 | "sibsp 891 non-null int64\n",
443 | "parch 891 non-null int64\n",
444 | "fare 891 non-null float64\n",
445 | "embarked 889 non-null object\n",
446 | "class 891 non-null category\n",
447 | "who 891 non-null object\n",
448 | "adult_male 891 non-null bool\n",
449 | "deck 203 non-null category\n",
450 | "embark_town 889 non-null object\n",
451 | "alive 891 non-null object\n",
452 | "alone 891 non-null bool\n",
453 | "dtypes: bool(2), category(2), float64(2), int64(4), object(5)\n",
454 | "memory usage: 80.6+ KB\n",
455 | "None\n"
456 | ]
457 | }
458 | ],
459 | "source": [
460 | "print(titanic.info())"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": 10,
466 | "metadata": {},
467 | "outputs": [],
468 | "source": [
469 | "import numpy as np\n",
470 | "\n",
471 | "def count_missing(vec):\n",
472 | " null_vec = pd.isnull(vec)\n",
473 | " null_count = np.sum(null_vec)\n",
474 | " return null_count"
475 | ]
476 | },
477 | {
478 | "cell_type": "code",
479 | "execution_count": 41,
480 | "metadata": {},
481 | "outputs": [
482 | {
483 | "name": "stdout",
484 | "output_type": "stream",
485 | "text": [
486 | "survived 0\n",
487 | "pclass 0\n",
488 | "sex 0\n",
489 | "age 177\n",
490 | "sibsp 0\n",
491 | "parch 0\n",
492 | "fare 0\n",
493 | "embarked 2\n",
494 | "class 0\n",
495 | "who 0\n",
496 | "adult_male 0\n",
497 | "deck 688\n",
498 | "embark_town 2\n",
499 | "alive 0\n",
500 | "alone 0\n",
501 | "dtype: int64\n"
502 | ]
503 | }
504 | ],
505 | "source": [
506 | "cmis_col = titanic.apply(count_missing)\n",
507 | "print(cmis_col)"
508 | ]
509 | },
510 | {
511 | "cell_type": "code",
512 | "execution_count": 43,
513 | "metadata": {},
514 | "outputs": [],
515 | "source": [
516 | "def prop_missing(vec):\n",
517 | " num = count_missing(vec)\n",
518 | " dem = vec.size\n",
519 | " return num / dem"
520 | ]
521 | },
522 | {
523 | "cell_type": "code",
524 | "execution_count": 45,
525 | "metadata": {},
526 | "outputs": [
527 | {
528 | "name": "stdout",
529 | "output_type": "stream",
530 | "text": [
531 | "survived 0.000000\n",
532 | "pclass 0.000000\n",
533 | "sex 0.000000\n",
534 | "age 0.198653\n",
535 | "sibsp 0.000000\n",
536 | "parch 0.000000\n",
537 | "fare 0.000000\n",
538 | "embarked 0.002245\n",
539 | "class 0.000000\n",
540 | "who 0.000000\n",
541 | "adult_male 0.000000\n",
542 | "deck 0.772166\n",
543 | "embark_town 0.002245\n",
544 | "alive 0.000000\n",
545 | "alone 0.000000\n",
546 | "dtype: float64\n"
547 | ]
548 | }
549 | ],
550 | "source": [
551 | "pmis_col = titanic.apply(prop_missing)\n",
552 | "print(pmis_col)"
553 | ]
554 | },
555 | {
556 | "cell_type": "code",
557 | "execution_count": 46,
558 | "metadata": {},
559 | "outputs": [],
560 | "source": [
561 | "def prop_complete(vec):\n",
562 | " return 1 - prop_missing(vec)"
563 | ]
564 | },
565 | {
566 | "cell_type": "markdown",
567 | "metadata": {},
568 | "source": [
569 | "### 8. 데이터프레임의 누락값을 처리하기 ― 행 방뱡"
570 | ]
571 | },
572 | {
573 | "cell_type": "code",
574 | "execution_count": 48,
575 | "metadata": {},
576 | "outputs": [
577 | {
578 | "name": "stdout",
579 | "output_type": "stream",
580 | "text": [
581 | "0 1\n",
582 | "1 0\n",
583 | "2 1\n",
584 | "3 0\n",
585 | "4 1\n",
586 | "dtype: int64\n"
587 | ]
588 | }
589 | ],
590 | "source": [
591 | "cmis_row = titanic.apply(count_missing, axis=1)\n",
592 | "pmis_row = titanic.apply(prop_missing, axis=1)\n",
593 | "pcom_row = titanic.apply(prop_complete, axis=1)\n",
594 | "\n",
595 | "print(cmis_row.head())"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": 49,
601 | "metadata": {},
602 | "outputs": [
603 | {
604 | "name": "stdout",
605 | "output_type": "stream",
606 | "text": [
607 | "0 0.066667\n",
608 | "1 0.000000\n",
609 | "2 0.066667\n",
610 | "3 0.000000\n",
611 | "4 0.066667\n",
612 | "dtype: float64\n"
613 | ]
614 | }
615 | ],
616 | "source": [
617 | "print(pmis_row.head())"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "execution_count": 50,
623 | "metadata": {},
624 | "outputs": [
625 | {
626 | "name": "stdout",
627 | "output_type": "stream",
628 | "text": [
629 | "0 0.933333\n",
630 | "1 1.000000\n",
631 | "2 0.933333\n",
632 | "3 1.000000\n",
633 | "4 0.933333\n",
634 | "dtype: float64\n"
635 | ]
636 | }
637 | ],
638 | "source": [
639 | "print(pcom_row.head())"
640 | ]
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": 51,
645 | "metadata": {},
646 | "outputs": [
647 | {
648 | "name": "stdout",
649 | "output_type": "stream",
650 | "text": [
651 | " survived pclass sex age sibsp parch fare embarked class \\\n",
652 | "0 0 3 male 22.0 1 0 7.2500 S Third \n",
653 | "1 1 1 female 38.0 1 0 71.2833 C First \n",
654 | "2 1 3 female 26.0 0 0 7.9250 S Third \n",
655 | "3 1 1 female 35.0 1 0 53.1000 S First \n",
656 | "4 0 3 male 35.0 0 0 8.0500 S Third \n",
657 | "\n",
658 | " who adult_male deck embark_town alive alone num_missing \n",
659 | "0 man True NaN Southampton no False 1 \n",
660 | "1 woman False C Cherbourg yes False 0 \n",
661 | "2 woman False NaN Southampton yes True 1 \n",
662 | "3 woman False C Southampton yes False 0 \n",
663 | "4 man True NaN Southampton no True 1 \n"
664 | ]
665 | }
666 | ],
667 | "source": [
668 | "titanic['num_missing'] = titanic.apply(count_missing, axis=1)\n",
669 | "\n",
670 | "print(titanic.head())"
671 | ]
672 | },
673 | {
674 | "cell_type": "code",
675 | "execution_count": 53,
676 | "metadata": {},
677 | "outputs": [
678 | {
679 | "name": "stdout",
680 | "output_type": "stream",
681 | "text": [
682 | " survived pclass sex age sibsp parch fare embarked class \\\n",
683 | "186 1 3 female NaN 1 0 15.5000 Q Third \n",
684 | "274 1 3 female NaN 0 0 7.7500 Q Third \n",
685 | "410 0 3 male NaN 0 0 7.8958 S Third \n",
686 | "547 1 2 male NaN 0 0 13.8625 C Second \n",
687 | "601 0 3 male NaN 0 0 7.8958 S Third \n",
688 | "578 0 3 female NaN 1 0 14.4583 C Third \n",
689 | "76 0 3 male NaN 0 0 7.8958 S Third \n",
690 | "560 0 3 male NaN 0 0 7.7500 Q Third \n",
691 | "511 0 3 male NaN 0 0 8.0500 S Third \n",
692 | "495 0 3 male NaN 0 0 14.4583 C Third \n",
693 | "\n",
694 | " who adult_male deck embark_town alive alone num_missing \n",
695 | "186 woman False NaN Queenstown yes False 2 \n",
696 | "274 woman False NaN Queenstown yes True 2 \n",
697 | "410 man True NaN Southampton no True 2 \n",
698 | "547 man True NaN Cherbourg yes True 2 \n",
699 | "601 man True NaN Southampton no True 2 \n",
700 | "578 woman False NaN Cherbourg no False 2 \n",
701 | "76 man True NaN Southampton no True 2 \n",
702 | "560 man True NaN Queenstown no True 2 \n",
703 | "511 man True NaN Southampton no True 2 \n",
704 | "495 man True NaN Cherbourg no True 2 \n"
705 | ]
706 | }
707 | ],
708 | "source": [
709 | "print(titanic.loc[titanic.num_missing > 1, :].sample(10))"
710 | ]
711 | }
712 | ],
713 | "metadata": {
714 | "kernelspec": {
715 | "display_name": "Python 3",
716 | "language": "python",
717 | "name": "python3"
718 | },
719 | "language_info": {
720 | "codemirror_mode": {
721 | "name": "ipython",
722 | "version": 3
723 | },
724 | "file_extension": ".py",
725 | "mimetype": "text/x-python",
726 | "name": "python",
727 | "nbconvert_exporter": "python",
728 | "pygments_lexer": "ipython3",
729 | "version": "3.6.5"
730 | }
731 | },
732 | "nbformat": 4,
733 | "nbformat_minor": 1
734 | }
735 |
--------------------------------------------------------------------------------
/notebook/.ipynb_checkpoints/Special-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 코드의 성능을 향상시켜 실행 시간 측정하기 ― timeit"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import pandas as pd\n",
18 | "import numpy as np"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]})"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "def avg_2_apply(row):\n",
37 | " x = row[0]\n",
38 | " y = row[1]\n",
39 | " if(x == 20):\n",
40 | " return np.nan\n",
41 | " else:\n",
42 | " return (x + y)/2"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### 2. 판다스 데이터프레임 ― 실행 시간 측정"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 4,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "511 µs ± 5.98 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
62 | ]
63 | }
64 | ],
65 | "source": [
66 | "%%timeit\n",
67 | "df.apply(avg_2_apply, axis = 1)"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "### 3. 넘파이로 벡터화한 함수 사용하기 ― 실행 시간 측정"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 5,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "@np.vectorize\n",
84 | "def v_avg_2mod(x, y):\n",
85 | " if(x == 20):\n",
86 | " return (np.NaN)\n",
87 | " else:\n",
88 | " return (x + y) / 2"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 6,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "name": "stdout",
98 | "output_type": "stream",
99 | "text": [
100 | "36 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "%%timeit\n",
106 | "v_avg_2mod(df['a'], df['b'])"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "### 5. numba 라이브러리로 벡터화한 함수 사용하기 ― 실행 시간 측정"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 7,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "import numba\n",
123 | "\n",
124 | "@numba.vectorize\n",
125 | "def v_avg_2_numba(x, y):\n",
126 | " if(x == 20):\n",
127 | " return (np.NaN)\n",
128 | " else:\n",
129 | " return (x + y) / 2"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 8,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "name": "stdout",
139 | "output_type": "stream",
140 | "text": [
141 | "4.46 µs ± 47.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
142 | ]
143 | }
144 | ],
145 | "source": [
146 | "%%timeit\n",
147 | "v_avg_2_numba(df['a'].values, df['b'].values)"
148 | ]
149 | }
150 | ],
151 | "metadata": {
152 | "kernelspec": {
153 | "display_name": "Python 3",
154 | "language": "python",
155 | "name": "python3"
156 | },
157 | "language_info": {
158 | "codemirror_mode": {
159 | "name": "ipython",
160 | "version": 3
161 | },
162 | "file_extension": ".py",
163 | "mimetype": "text/x-python",
164 | "name": "python",
165 | "nbconvert_exporter": "python",
166 | "pygments_lexer": "ipython3",
167 | "version": "3.6.5"
168 | }
169 | },
170 | "nbformat": 4,
171 | "nbformat_minor": 2
172 | }
173 |
--------------------------------------------------------------------------------
/notebook/.ipynb_checkpoints/hello_jupyter_notebook-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": []
9 | }
10 | ],
11 | "metadata": {
12 | "kernelspec": {
13 | "display_name": "Python 3",
14 | "language": "python",
15 | "name": "python3"
16 | },
17 | "language_info": {
18 | "codemirror_mode": {
19 | "name": "ipython",
20 | "version": 3
21 | },
22 | "file_extension": ".py",
23 | "mimetype": "text/x-python",
24 | "name": "python",
25 | "nbconvert_exporter": "python",
26 | "pygments_lexer": "ipython3",
27 | "version": "3.6.5"
28 | }
29 | },
30 | "nbformat": 4,
31 | "nbformat_minor": 2
32 | }
33 |
--------------------------------------------------------------------------------
/notebook/02_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 갭마인더 데이터 집합 불러오기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 불러온 데이터 집합 살펴보기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 열 단위로 데이터 추출하기"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# loc 속성으로 행 단위 데이터 추출하기"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# tail과 loc는 조금 달라요!"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# iloc 속성으로 행 단위 데이터 추출하기"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# 파이썬 슬라이싱 구문을 조합하여 원하는 데이터 추출하기"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# iloc 속성과 range 메서드로 원하는 데이터 추출하기"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 열 지정값에 파이썬 슬라이싱을 사용하여 원하는 데이터 추출하기"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": []
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "# loc, iloc 자유자재로 사용하기"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": []
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "# 그룹화한 데이터의 평균 구하기"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": []
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "# 그룹화한 데이터의 개수 세어보기"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": []
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {},
174 | "source": [
175 | "# 그래프 만들기"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": []
184 | }
185 | ],
186 | "metadata": {
187 | "kernelspec": {
188 | "display_name": "Python 3",
189 | "language": "python",
190 | "name": "python3"
191 | },
192 | "language_info": {
193 | "codemirror_mode": {
194 | "name": "ipython",
195 | "version": 3
196 | },
197 | "file_extension": ".py",
198 | "mimetype": "text/x-python",
199 | "name": "python",
200 | "nbconvert_exporter": "python",
201 | "pygments_lexer": "ipython3",
202 | "version": "3.6.5"
203 | }
204 | },
205 | "nbformat": 4,
206 | "nbformat_minor": 2
207 | }
208 |
--------------------------------------------------------------------------------
/notebook/03_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 시리즈 만들기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 데이터프레임 만들기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 데이터프레임에서 시리즈 선택하기"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# index, values, keys 사용하기"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 시리즈의 mean, min, max, std 메서드 사용하기"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# 시리즈와 불린 추출 사용하기"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# 시리즈와 브로드캐스팅"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# 데이터프레임과 불린 추출"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 데이터프레임과 브로드캐스팅"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": []
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "# 열의 자료형 바꾸기와 새로운 열 추가하기"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": []
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "# 시리즈, 데이터프레임의 데이터 섞어보기"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": []
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "# 데이터프레임의 열 삭제하기"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": []
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {},
174 | "source": [
175 | "# 피클 형식으로 저장하기"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": []
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "# CSV 불러오기"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {},
196 | "outputs": [],
197 | "source": []
198 | }
199 | ],
200 | "metadata": {
201 | "kernelspec": {
202 | "display_name": "Python 3",
203 | "language": "python",
204 | "name": "python3"
205 | },
206 | "language_info": {
207 | "codemirror_mode": {
208 | "name": "ipython",
209 | "version": 3
210 | },
211 | "file_extension": ".py",
212 | "mimetype": "text/x-python",
213 | "name": "python",
214 | "nbconvert_exporter": "python",
215 | "pygments_lexer": "ipython3",
216 | "version": "3.6.5"
217 | }
218 | },
219 | "nbformat": 4,
220 | "nbformat_minor": 2
221 | }
222 |
--------------------------------------------------------------------------------
/notebook/04_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 앤스콤 데이터 집합 불러오기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# matplotlib 라이브러리로 간단한 그래프 그리기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 한 번에 4개의 그래프 그리기"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# 기초 그래프 그리기 - 히스토그램, 산점도, 박스 그래프"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 단변량 그래프 그리기 - 히스토그램"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# 다변량 그래프 그리기"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# 데이터프레임과 시리즈로 그래프 그리기"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# 알아두면 좋아요"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | }
115 | ],
116 | "metadata": {
117 | "kernelspec": {
118 | "display_name": "Python 3",
119 | "language": "python",
120 | "name": "python3"
121 | },
122 | "language_info": {
123 | "codemirror_mode": {
124 | "name": "ipython",
125 | "version": 3
126 | },
127 | "file_extension": ".py",
128 | "mimetype": "text/x-python",
129 | "name": "python",
130 | "nbconvert_exporter": "python",
131 | "pygments_lexer": "ipython3",
132 | "version": "3.6.5"
133 | }
134 | },
135 | "nbformat": 4,
136 | "nbformat_minor": 2
137 | }
138 |
--------------------------------------------------------------------------------
/notebook/05_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# concat 메서드 사용하기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 데이터프레임에 시리즈 연결하기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 행 1개로 구성된 데이터프레임 생성하여 연결하기"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# 다양한 방법으로 데이터 연결하기"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 공통 열과 공통 인덱스만 연결하기"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# merge 메서드 사용하기"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | }
87 | ],
88 | "metadata": {
89 | "kernelspec": {
90 | "display_name": "Python 3",
91 | "language": "python",
92 | "name": "python3"
93 | },
94 | "language_info": {
95 | "codemirror_mode": {
96 | "name": "ipython",
97 | "version": 3
98 | },
99 | "file_extension": ".py",
100 | "mimetype": "text/x-python",
101 | "name": "python",
102 | "nbconvert_exporter": "python",
103 | "pygments_lexer": "ipython3",
104 | "version": "3.6.5"
105 | }
106 | },
107 | "nbformat": 4,
108 | "nbformat_minor": 2
109 | }
110 |
--------------------------------------------------------------------------------
/notebook/06_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 누락값 확인하기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 누락값을 포함한 데이터를 불러올 때"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 데이터 집합을 연결할 때 누락값이 발생하는 경우"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# 데이터를 입력할 때 누락값이 발생하는 경우"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 인덱스를 다시 만들 때 누락값이 발생하는 경우"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# 누락값의 개수 구하기"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# 누락값을 다른 값으로 변경하기"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# 누락값 삭제하기"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 누락값이 포함된 데이터 계산하기"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": []
128 | }
129 | ],
130 | "metadata": {
131 | "kernelspec": {
132 | "display_name": "Python 3",
133 | "language": "python",
134 | "name": "python3"
135 | },
136 | "language_info": {
137 | "codemirror_mode": {
138 | "name": "ipython",
139 | "version": 3
140 | },
141 | "file_extension": ".py",
142 | "mimetype": "text/x-python",
143 | "name": "python",
144 | "nbconvert_exporter": "python",
145 | "pygments_lexer": "ipython3",
146 | "version": "3.6.5"
147 | }
148 | },
149 | "nbformat": 4,
150 | "nbformat_minor": 2
151 | }
152 |
--------------------------------------------------------------------------------
/notebook/07_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1개의 열만 고정하고 나머지 열을 행으로 바꾸기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 2개 이상의 열을 고정하고 나머지 열을 행으로 바꾸기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "scrolled": true
35 | },
36 | "source": [
37 | "# ebola 데이터 집합 살펴보기"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {
44 | "scrolled": true
45 | },
46 | "outputs": [],
47 | "source": []
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "# 열 이름 나누고 데이터 프레임에 추가하기"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": []
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {},
66 | "source": [
67 | "# concat 메서드를 응용하여 데이터프레임에 열 추가하기"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": []
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "# 기상 데이터의 여러 열을 하나로 정리하기"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": []
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "# 빌보드 차트의 중복 데이터 처리하기"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": []
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "# 뉴욕 택시 데이터 준비"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": []
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "# 반복문으로 데이터 준비하기"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {},
130 | "outputs": [],
131 | "source": []
132 | }
133 | ],
134 | "metadata": {
135 | "kernelspec": {
136 | "display_name": "Python 3",
137 | "language": "python",
138 | "name": "python3"
139 | },
140 | "language_info": {
141 | "codemirror_mode": {
142 | "name": "ipython",
143 | "version": 3
144 | },
145 | "file_extension": ".py",
146 | "mimetype": "text/x-python",
147 | "name": "python",
148 | "nbconvert_exporter": "python",
149 | "pygments_lexer": "ipython3",
150 | "version": "3.6.5"
151 | }
152 | },
153 | "nbformat": 4,
154 | "nbformat_minor": 2
155 | }
156 |
--------------------------------------------------------------------------------
/notebook/08_done.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 자료형을 자유자재로 변환하기 ─ astype 메서드(172쪽)"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import pandas as pd\n",
18 | "import seaborn as sns\n",
19 | "\n",
20 | "tips = sns.load_dataset(\"tips\")"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "### 2. 여러 가지 자료형을 문자열로 변환하기"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "name": "stdout",
37 | "output_type": "stream",
38 | "text": [
39 | "total_bill float64\n",
40 | "tip float64\n",
41 | "sex category\n",
42 | "smoker category\n",
43 | "day category\n",
44 | "time category\n",
45 | "size int64\n",
46 | "sex_str object\n",
47 | "dtype: object\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "tips['sex_str'] = tips['sex'].astype(str)\n",
53 | "print(tips.dtypes)"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "### 4. 자료형을 변환한 데이터 다시 원래대로 만들기"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 4,
66 | "metadata": {},
67 | "outputs": [
68 | {
69 | "name": "stdout",
70 | "output_type": "stream",
71 | "text": [
72 | "total_bill object\n",
73 | "tip float64\n",
74 | "sex category\n",
75 | "smoker category\n",
76 | "day category\n",
77 | "time category\n",
78 | "size int64\n",
79 | "sex_str object\n",
80 | "dtype: object\n"
81 | ]
82 | }
83 | ],
84 | "source": [
85 | "tips['total_bill'] = tips['total_bill'].astype(str) \n",
86 | "print(tips.dtypes)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 5,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "name": "stdout",
96 | "output_type": "stream",
97 | "text": [
98 | "total_bill float64\n",
99 | "tip float64\n",
100 | "sex category\n",
101 | "smoker category\n",
102 | "day category\n",
103 | "time category\n",
104 | "size int64\n",
105 | "sex_str object\n",
106 | "dtype: object\n"
107 | ]
108 | }
109 | ],
110 | "source": [
111 | "tips['total_bill'] = tips['total_bill'].astype(float) \n",
112 | "print(tips.dtypes)"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 직접 해보세요!\n",
120 | "## 잘못 입력한 문자열 처리하기 ─ to_numeric 메서드(174쪽)"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 6,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "name": "stdout",
130 | "output_type": "stream",
131 | "text": [
132 | " total_bill tip sex smoker day time size sex_str\n",
133 | "0 16.99 1.01 Female No Sun Dinner 2 Female\n",
134 | "1 missing 1.66 Male No Sun Dinner 3 Male\n",
135 | "2 21.01 3.50 Male No Sun Dinner 3 Male\n",
136 | "3 missing 3.31 Male No Sun Dinner 2 Male\n",
137 | "4 24.59 3.61 Female No Sun Dinner 4 Female\n",
138 | "5 missing 4.71 Male No Sun Dinner 4 Male\n",
139 | "6 8.77 2.00 Male No Sun Dinner 2 Male\n",
140 | "7 missing 3.12 Male No Sun Dinner 4 Male\n",
141 | "8 15.04 1.96 Male No Sun Dinner 2 Male\n",
142 | "9 14.78 3.23 Male No Sun Dinner 2 Male\n"
143 | ]
144 | },
145 | {
146 | "name": "stderr",
147 | "output_type": "stream",
148 | "text": [
149 | "C:\\Users\\phk70\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:543: SettingWithCopyWarning: \n",
150 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
151 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
152 | "\n",
153 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
154 | " self.obj[item] = s\n"
155 | ]
156 | }
157 | ],
158 | "source": [
159 | "tips_sub_miss = tips.head(10)\n",
160 | "tips_sub_miss.loc[[1, 3, 5, 7], 'total_bill'] = 'missing'\n",
161 | "\n",
162 | "print(tips_sub_miss)"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 8,
168 | "metadata": {},
169 | "outputs": [
170 | {
171 | "name": "stdout",
172 | "output_type": "stream",
173 | "text": [
174 | "total_bill object\n",
175 | "tip float64\n",
176 | "sex category\n",
177 | "smoker category\n",
178 | "day category\n",
179 | "time category\n",
180 | "size int64\n",
181 | "sex_str object\n",
182 | "dtype: object\n"
183 | ]
184 | }
185 | ],
186 | "source": [
187 | "print(tips_sub_miss.dtypes)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 9,
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "ename": "ValueError",
197 | "evalue": "could not convert string to float: 'missing'",
198 | "output_type": "error",
199 | "traceback": [
200 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
201 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
202 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
203 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 175\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 177\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 178\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 179\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_deprecate_kwarg\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
204 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[0;32m 4995\u001b[0m \u001b[1;31m# else, only a single dtype is given\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4996\u001b[0m new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,\n\u001b[1;32m-> 4997\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 4998\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4999\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
205 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[0;32m 3712\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3713\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3714\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'astype'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3715\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3716\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
206 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[0;32m 3579\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3580\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mgr'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3581\u001b[1;33m \u001b[0mapplied\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3582\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3583\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
207 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[0;32m 573\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'raise'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 574\u001b[0m return self._astype(dtype, copy=copy, errors=errors, values=values,\n\u001b[1;32m--> 575\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 576\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 577\u001b[0m def _astype(self, dtype, copy=False, errors='raise', values=None,\n",
208 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36m_astype\u001b[1;34m(self, dtype, copy, errors, values, klass, mgr, **kwargs)\u001b[0m\n\u001b[0;32m 662\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 663\u001b[0m \u001b[1;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 664\u001b[1;33m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 665\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 666\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
209 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[1;34m(arr, dtype, copy)\u001b[0m\n\u001b[0;32m 728\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 729\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 730\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 731\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 732\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
210 | "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'missing'"
211 | ]
212 | }
213 | ],
214 | "source": [
215 | "tips_sub_miss['total_bill'].astype(float)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 10,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "ename": "ValueError",
225 | "evalue": "Unable to parse string \"missing\" at position 1",
226 | "output_type": "error",
227 | "traceback": [
228 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
229 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
230 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n",
231 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\"",
232 | "\nDuring handling of the above exception, another exception occurred:\n",
233 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
234 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
235 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\tools\\numeric.py\u001b[0m in \u001b[0;36mto_numeric\u001b[1;34m(arg, errors, downcast)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[0mcoerce_numeric\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'raise'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 132\u001b[0m values = lib.maybe_convert_numeric(values, set(),\n\u001b[1;32m--> 133\u001b[1;33m coerce_numeric=coerce_numeric)\n\u001b[0m\u001b[0;32m 134\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
236 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n",
237 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\" at position 1"
238 | ]
239 | }
240 | ],
241 | "source": [
242 | "pd.to_numeric(tips_sub_miss['total_bill'])"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 11,
248 | "metadata": {},
249 | "outputs": [
250 | {
251 | "name": "stdout",
252 | "output_type": "stream",
253 | "text": [
254 | "total_bill object\n",
255 | "tip float64\n",
256 | "sex category\n",
257 | "smoker category\n",
258 | "day category\n",
259 | "time category\n",
260 | "size int64\n",
261 | "sex_str object\n",
262 | "dtype: object\n"
263 | ]
264 | },
265 | {
266 | "name": "stderr",
267 | "output_type": "stream",
268 | "text": [
269 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
270 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
271 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
272 | "\n",
273 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
274 | " \"\"\"Entry point for launching an IPython kernel.\n"
275 | ]
276 | }
277 | ],
278 | "source": [
279 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='ignore')\n",
280 | "\n",
281 | "print(tips_sub_miss.dtypes)"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 12,
287 | "metadata": {},
288 | "outputs": [
289 | {
290 | "name": "stdout",
291 | "output_type": "stream",
292 | "text": [
293 | "total_bill float64\n",
294 | "tip float64\n",
295 | "sex category\n",
296 | "smoker category\n",
297 | "day category\n",
298 | "time category\n",
299 | "size int64\n",
300 | "sex_str object\n",
301 | "dtype: object\n"
302 | ]
303 | },
304 | {
305 | "name": "stderr",
306 | "output_type": "stream",
307 | "text": [
308 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
309 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
310 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
311 | "\n",
312 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
313 | " \"\"\"Entry point for launching an IPython kernel.\n"
314 | ]
315 | }
316 | ],
317 | "source": [
318 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce')\n",
319 | "\n",
320 | "print(tips_sub_miss.dtypes)"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 13,
326 | "metadata": {},
327 | "outputs": [
328 | {
329 | "name": "stdout",
330 | "output_type": "stream",
331 | "text": [
332 | "total_bill float32\n",
333 | "tip float64\n",
334 | "sex category\n",
335 | "smoker category\n",
336 | "day category\n",
337 | "time category\n",
338 | "size int64\n",
339 | "sex_str object\n",
340 | "dtype: object\n"
341 | ]
342 | },
343 | {
344 | "name": "stderr",
345 | "output_type": "stream",
346 | "text": [
347 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
348 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
349 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
350 | "\n",
351 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
352 | " \"\"\"Entry point for launching an IPython kernel.\n"
353 | ]
354 | }
355 | ],
356 | "source": [
357 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce', downcast='float')\n",
358 | "\n",
359 | "print(tips_sub_miss.dtypes)"
360 | ]
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {},
365 | "source": [
366 | "# 직접 해보세요!\n",
367 | "## 문자열을 카테고리로 변환하기(179쪽)"
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": 14,
373 | "metadata": {},
374 | "outputs": [
375 | {
376 | "name": "stdout",
377 | "output_type": "stream",
378 | "text": [
379 | "\n",
380 | "RangeIndex: 244 entries, 0 to 243\n",
381 | "Data columns (total 8 columns):\n",
382 | "total_bill 244 non-null float64\n",
383 | "tip 244 non-null float64\n",
384 | "sex 244 non-null object\n",
385 | "smoker 244 non-null category\n",
386 | "day 244 non-null category\n",
387 | "time 244 non-null category\n",
388 | "size 244 non-null int64\n",
389 | "sex_str 244 non-null object\n",
390 | "dtypes: category(3), float64(2), int64(1), object(2)\n",
391 | "memory usage: 10.7+ KB\n",
392 | "None\n"
393 | ]
394 | }
395 | ],
396 | "source": [
397 | "tips['sex'] = tips['sex'].astype('str') \n",
398 | "print(tips.info())"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": 15,
404 | "metadata": {},
405 | "outputs": [
406 | {
407 | "name": "stdout",
408 | "output_type": "stream",
409 | "text": [
410 | "\n",
411 | "RangeIndex: 244 entries, 0 to 243\n",
412 | "Data columns (total 8 columns):\n",
413 | "total_bill 244 non-null float64\n",
414 | "tip 244 non-null float64\n",
415 | "sex 244 non-null category\n",
416 | "smoker 244 non-null category\n",
417 | "day 244 non-null category\n",
418 | "time 244 non-null category\n",
419 | "size 244 non-null int64\n",
420 | "sex_str 244 non-null object\n",
421 | "dtypes: category(4), float64(2), int64(1), object(1)\n",
422 | "memory usage: 9.1+ KB\n",
423 | "None\n"
424 | ]
425 | }
426 | ],
427 | "source": [
428 | "tips['sex'] = tips['sex'].astype('category') \n",
429 | "print(tips.info())"
430 | ]
431 | }
432 | ],
433 | "metadata": {
434 | "kernelspec": {
435 | "display_name": "Python 3",
436 | "language": "python",
437 | "name": "python3"
438 | },
439 | "language_info": {
440 | "codemirror_mode": {
441 | "name": "ipython",
442 | "version": 3
443 | },
444 | "file_extension": ".py",
445 | "mimetype": "text/x-python",
446 | "name": "python",
447 | "nbconvert_exporter": "python",
448 | "pygments_lexer": "ipython3",
449 | "version": "3.6.5"
450 | }
451 | },
452 | "nbformat": 4,
453 | "nbformat_minor": 1
454 | }
455 |
--------------------------------------------------------------------------------
/notebook/08_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# dtypes 속성으로 데이터프레임의 자료형 살펴보기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 여러 가지 자료형을 문자열로 변환하기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 자료형을 변환한 데이터 다시 원래대로 만들기"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# 잘못 입력한 문자열 처리하기"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 문자열을 카테고리로 변환하기"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | }
73 | ],
74 | "metadata": {
75 | "kernelspec": {
76 | "display_name": "Python 3",
77 | "language": "python",
78 | "name": "python3"
79 | },
80 | "language_info": {
81 | "codemirror_mode": {
82 | "name": "ipython",
83 | "version": 3
84 | },
85 | "file_extension": ".py",
86 | "mimetype": "text/x-python",
87 | "name": "python",
88 | "nbconvert_exporter": "python",
89 | "pygments_lexer": "ipython3",
90 | "version": "3.6.5"
91 | }
92 | },
93 | "nbformat": 4,
94 | "nbformat_minor": 1
95 | }
96 |
--------------------------------------------------------------------------------
/notebook/09_done.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 문자열 추출하기(183쪽)"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "word = 'grail'\n",
18 | "sent = 'a scratch'"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "name": "stdout",
28 | "output_type": "stream",
29 | "text": [
30 | "g\n"
31 | ]
32 | }
33 | ],
34 | "source": [
35 | "print(word[0])"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 3,
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "name": "stdout",
45 | "output_type": "stream",
46 | "text": [
47 | "a\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "print(sent[0])"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 4,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "name": "stdout",
62 | "output_type": "stream",
63 | "text": [
64 | "gra\n"
65 | ]
66 | }
67 | ],
68 | "source": [
69 | "print(word[0:3])"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "---"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 5,
82 | "metadata": {},
83 | "outputs": [
84 | {
85 | "name": "stdout",
86 | "output_type": "stream",
87 | "text": [
88 | "h\n"
89 | ]
90 | }
91 | ],
92 | "source": [
93 | "print(sent[-1])"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 6,
99 | "metadata": {},
100 | "outputs": [
101 | {
102 | "name": "stdout",
103 | "output_type": "stream",
104 | "text": [
105 | "a\n"
106 | ]
107 | }
108 | ],
109 | "source": [
110 | "print(sent[-9:-8])"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 7,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "a\n"
123 | ]
124 | }
125 | ],
126 | "source": [
127 | "print(sent[0:-8])"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "# 알아두면 좋아요!\n",
135 | "## 전체 문자열을 추출할 때 음수를 사용하면 안 됩니다(184쪽)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 8,
141 | "metadata": {},
142 | "outputs": [
143 | {
144 | "name": "stdout",
145 | "output_type": "stream",
146 | "text": [
147 | "scratc\n"
148 | ]
149 | }
150 | ],
151 | "source": [
152 | "print(sent[2:-1])"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 11,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "name": "stdout",
162 | "output_type": "stream",
163 | "text": [
164 | "scratc\n"
165 | ]
166 | }
167 | ],
168 | "source": [
169 | "print(sent[-7:-1])"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 12,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "name": "stdout",
179 | "output_type": "stream",
180 | "text": [
181 | "9\n"
182 | ]
183 | }
184 | ],
185 | "source": [
186 | "s_len = len(sent)\n",
187 | "print(s_len)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 13,
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "name": "stdout",
197 | "output_type": "stream",
198 | "text": [
199 | "scratch\n"
200 | ]
201 | }
202 | ],
203 | "source": [
204 | "print(sent[2:s_len])"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "# 직접 해보세요!\n",
212 | "## 왼쪽이나 오른쪽 범위를 지정하지 않고 문자열 추출하기(185쪽)"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 14,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "gra\n"
225 | ]
226 | }
227 | ],
228 | "source": [
229 | "print(word[0:3])"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 15,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "name": "stdout",
239 | "output_type": "stream",
240 | "text": [
241 | "gra\n"
242 | ]
243 | }
244 | ],
245 | "source": [
246 | "print(word[ :3])"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 16,
252 | "metadata": {},
253 | "outputs": [
254 | {
255 | "name": "stdout",
256 | "output_type": "stream",
257 | "text": [
258 | "scratch\n"
259 | ]
260 | }
261 | ],
262 | "source": [
263 | "print(sent[2:len(sent)])"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 17,
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "name": "stdout",
273 | "output_type": "stream",
274 | "text": [
275 | "scratch\n"
276 | ]
277 | }
278 | ],
279 | "source": [
280 | "print(sent[2: ])"
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 18,
286 | "metadata": {},
287 | "outputs": [
288 | {
289 | "name": "stdout",
290 | "output_type": "stream",
291 | "text": [
292 | "a scratch\n"
293 | ]
294 | }
295 | ],
296 | "source": [
297 | "print(sent[ : ])"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": 19,
303 | "metadata": {},
304 | "outputs": [
305 | {
306 | "name": "stdout",
307 | "output_type": "stream",
308 | "text": [
309 | "asrth\n"
310 | ]
311 | }
312 | ],
313 | "source": [
314 | "print(sent[::2])"
315 | ]
316 | },
317 | {
318 | "cell_type": "markdown",
319 | "metadata": {},
320 | "source": [
321 | "# 직접 해보세요!\n",
322 | "## join, splitlines, replace 메서드 실습하기(188쪽)"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {},
328 | "source": [
329 | "### 1. join 메서드"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 20,
335 | "metadata": {},
336 | "outputs": [],
337 | "source": [
338 | "d1 = '40°' \n",
339 | "m1 = \"46'\" \n",
340 | "s1 = '52.837\"' \n",
341 | "u1 = 'N'\n",
342 | "\n",
343 | "d2 = '73°' \n",
344 | "m2 = \"58'\" \n",
345 | "s2 = '26.302\"' \n",
346 | "u2 = 'W'"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 21,
352 | "metadata": {},
353 | "outputs": [
354 | {
355 | "name": "stdout",
356 | "output_type": "stream",
357 | "text": [
358 | "40° 46' 52.837\" N 73° 58' 26.302\" W\n"
359 | ]
360 | }
361 | ],
362 | "source": [
363 | "coords = ' '.join([d1, m1, s1, u1, d2, m2, s2, u2])\n",
364 | "print(coords)"
365 | ]
366 | },
367 | {
368 | "cell_type": "markdown",
369 | "metadata": {},
370 | "source": [
371 | "### 2. splitlines 메서드"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 22,
377 | "metadata": {},
378 | "outputs": [
379 | {
380 | "name": "stdout",
381 | "output_type": "stream",
382 | "text": [
383 | "Guard: What? Ridden on a horse?\n",
384 | "King Arthur: Yes!\n",
385 | "Guard: You're using coconuts!\n",
386 | "King Arthur: What?\n",
387 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n",
388 | "\n"
389 | ]
390 | }
391 | ],
392 | "source": [
393 | "multi_str = \"\"\"Guard: What? Ridden on a horse?\n",
394 | "King Arthur: Yes!\n",
395 | "Guard: You're using coconuts!\n",
396 | "King Arthur: What?\n",
397 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n",
398 | "\"\"\" \n",
399 | "print(multi_str)"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 23,
405 | "metadata": {},
406 | "outputs": [
407 | {
408 | "name": "stdout",
409 | "output_type": "stream",
410 | "text": [
411 | "['Guard: What? Ridden on a horse?', 'King Arthur: Yes!', \"Guard: You're using coconuts!\", 'King Arthur: What?', \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n"
412 | ]
413 | }
414 | ],
415 | "source": [
416 | "multi_str_split = multi_str.splitlines() \n",
417 | "print(multi_str_split)"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": 24,
423 | "metadata": {},
424 | "outputs": [
425 | {
426 | "name": "stdout",
427 | "output_type": "stream",
428 | "text": [
429 | "['Guard: What? Ridden on a horse?', \"Guard: You're using coconuts!\", \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n"
430 | ]
431 | }
432 | ],
433 | "source": [
434 | "guard = multi_str_split[::2] \n",
435 | "print(guard)"
436 | ]
437 | },
438 | {
439 | "cell_type": "markdown",
440 | "metadata": {},
441 | "source": [
442 | "### 4. replace 메서드"
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": 25,
448 | "metadata": {},
449 | "outputs": [
450 | {
451 | "name": "stdout",
452 | "output_type": "stream",
453 | "text": [
454 | "['What? Ridden on a horse?', \"You're using coconuts!\", \"You've got ... coconut[s] and you're bangin' 'em together. \"]\n"
455 | ]
456 | }
457 | ],
458 | "source": [
459 | "guard = multi_str.replace(\"Guard: \", \"\").splitlines()[::2] \n",
460 | "print(guard)"
461 | ]
462 | },
463 | {
464 | "cell_type": "markdown",
465 | "metadata": {},
466 | "source": [
467 | "# 직접 해보세요!\n",
468 | "## 문자열 포매팅하기(190쪽)"
469 | ]
470 | },
471 | {
472 | "cell_type": "code",
473 | "execution_count": 26,
474 | "metadata": {},
475 | "outputs": [
476 | {
477 | "name": "stdout",
478 | "output_type": "stream",
479 | "text": [
480 | "It's just a flesh wound!\n"
481 | ]
482 | }
483 | ],
484 | "source": [
485 | "var = 'flesh wound' \n",
486 | "s = \"It's just a {}!\"\n",
487 | "\n",
488 | "print(s.format(var))"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 27,
494 | "metadata": {},
495 | "outputs": [
496 | {
497 | "name": "stdout",
498 | "output_type": "stream",
499 | "text": [
500 | "It's just a scratch!\n"
501 | ]
502 | }
503 | ],
504 | "source": [
505 | "print(s.format('scratch'))"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 28,
511 | "metadata": {},
512 | "outputs": [
513 | {
514 | "name": "stdout",
515 | "output_type": "stream",
516 | "text": [
517 | "Black Knight: 'Tis but a scratch.\n",
518 | "King Arthur: A scratch? Your arm's off!\n",
519 | "\n"
520 | ]
521 | }
522 | ],
523 | "source": [
524 | "s = \"\"\"Black Knight: 'Tis but a {0}.\n",
525 | "King Arthur: A {0}? Your arm's off!\n",
526 | "\"\"\" \n",
527 | "print(s.format('scratch'))"
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "execution_count": 29,
533 | "metadata": {},
534 | "outputs": [
535 | {
536 | "name": "stdout",
537 | "output_type": "stream",
538 | "text": [
539 | "Hayden Planetarium Coordinates: 40.7815° N, 73.9733° W\n"
540 | ]
541 | }
542 | ],
543 | "source": [
544 | "s = 'Hayden Planetarium Coordinates: {lat}, {lon}' \n",
545 | "print(s.format(lat='40.7815° N', lon='73.9733° W'))"
546 | ]
547 | },
548 | {
549 | "cell_type": "markdown",
550 | "metadata": {},
551 | "source": [
552 | "# 직접 해보세요!\n",
553 | "## 숫자 데이터 포매팅하기(191쪽)"
554 | ]
555 | },
556 | {
557 | "cell_type": "code",
558 | "execution_count": 30,
559 | "metadata": {},
560 | "outputs": [
561 | {
562 | "name": "stdout",
563 | "output_type": "stream",
564 | "text": [
565 | "Some digits of pi: 3.14159265359\n"
566 | ]
567 | }
568 | ],
569 | "source": [
570 | "print('Some digits of pi: {}'.format(3.14159265359))"
571 | ]
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": 31,
576 | "metadata": {},
577 | "outputs": [
578 | {
579 | "name": "stdout",
580 | "output_type": "stream",
581 | "text": [
582 | "In 2005, Lu Chao of China recited 67,890 digits of pi\n"
583 | ]
584 | }
585 | ],
586 | "source": [
587 | "print(\"In 2005, Lu Chao of China recited {:,} digits of pi\".format(67890))"
588 | ]
589 | },
590 | {
591 | "cell_type": "code",
592 | "execution_count": 32,
593 | "metadata": {},
594 | "outputs": [
595 | {
596 | "name": "stdout",
597 | "output_type": "stream",
598 | "text": [
599 | "I remember 0.0001031 or 0.0103% of what Lu Chao recited\n"
600 | ]
601 | }
602 | ],
603 | "source": [
604 | "print(\"I remember {0:.4} or {0:.4%} of what Lu Chao recited\".format(7/67890))"
605 | ]
606 | },
607 | {
608 | "cell_type": "code",
609 | "execution_count": 33,
610 | "metadata": {},
611 | "outputs": [
612 | {
613 | "name": "stdout",
614 | "output_type": "stream",
615 | "text": [
616 | "My ID number is 00042\n"
617 | ]
618 | }
619 | ],
620 | "source": [
621 | "print(\"My ID number is {0:05d}\".format(42))"
622 | ]
623 | },
624 | {
625 | "cell_type": "markdown",
626 | "metadata": {},
627 | "source": [
628 | "# 직접 해보세요!\n",
629 | "## % 연산자로 포매팅하기(192쪽)"
630 | ]
631 | },
632 | {
633 | "cell_type": "code",
634 | "execution_count": 34,
635 | "metadata": {},
636 | "outputs": [
637 | {
638 | "name": "stdout",
639 | "output_type": "stream",
640 | "text": [
641 | "I only know 7 digits of pi\n"
642 | ]
643 | }
644 | ],
645 | "source": [
646 | "s = 'I only know %d digits of pi' % 7 \n",
647 | "print(s)"
648 | ]
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": 35,
653 | "metadata": {},
654 | "outputs": [
655 | {
656 | "name": "stdout",
657 | "output_type": "stream",
658 | "text": [
659 | "Some digits of e: 2.72\n"
660 | ]
661 | }
662 | ],
663 | "source": [
664 | "print('Some digits of %(cont)s: %(value).2f' % {'cont': 'e', 'value': 2.718})"
665 | ]
666 | },
667 | {
668 | "cell_type": "markdown",
669 | "metadata": {},
670 | "source": [
671 | "# 알아두면 좋아요!\n",
672 | "## f-strings로 포매팅 사용하기(193쪽)"
673 | ]
674 | },
675 | {
676 | "cell_type": "code",
677 | "execution_count": 36,
678 | "metadata": {},
679 | "outputs": [
680 | {
681 | "name": "stdout",
682 | "output_type": "stream",
683 | "text": [
684 | "It's just a flesh wound!\n"
685 | ]
686 | }
687 | ],
688 | "source": [
689 | "var = 'flesh wound' \n",
690 | "s = f\"It's just a {var}!\" \n",
691 | "print(s)"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 37,
697 | "metadata": {},
698 | "outputs": [
699 | {
700 | "name": "stdout",
701 | "output_type": "stream",
702 | "text": [
703 | "Hayden Planetarium Coordinates: 40.7815°N, 73.9733°W\n"
704 | ]
705 | }
706 | ],
707 | "source": [
708 | "lat='40.7815°N' \n",
709 | "lon='73.9733°W' \n",
710 | "s = f'Hayden Planetarium Coordinates: {lat}, {lon}' \n",
711 | "print(s)"
712 | ]
713 | },
714 | {
715 | "cell_type": "markdown",
716 | "metadata": {},
717 | "source": [
718 | "# 직접 해보세요!\n",
719 | "## 정규식으로 전화번호 패턴 찾기(196쪽)"
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "execution_count": 38,
725 | "metadata": {},
726 | "outputs": [],
727 | "source": [
728 | "import re\n",
729 | "\n",
730 | "tele_num = '1234567890'"
731 | ]
732 | },
733 | {
734 | "cell_type": "code",
735 | "execution_count": 39,
736 | "metadata": {},
737 | "outputs": [
738 | {
739 | "name": "stdout",
740 | "output_type": "stream",
741 | "text": [
742 | "\n"
743 | ]
744 | }
745 | ],
746 | "source": [
747 | "m = re.match(pattern='\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d', string=tele_num) \n",
748 | "print(type(m))"
749 | ]
750 | },
751 | {
752 | "cell_type": "code",
753 | "execution_count": 40,
754 | "metadata": {},
755 | "outputs": [
756 | {
757 | "name": "stdout",
758 | "output_type": "stream",
759 | "text": [
760 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n"
761 | ]
762 | }
763 | ],
764 | "source": [
765 | "print(m)"
766 | ]
767 | },
768 | {
769 | "cell_type": "code",
770 | "execution_count": 41,
771 | "metadata": {},
772 | "outputs": [
773 | {
774 | "name": "stdout",
775 | "output_type": "stream",
776 | "text": [
777 | "True\n"
778 | ]
779 | }
780 | ],
781 | "source": [
782 | "print(bool(m))"
783 | ]
784 | },
785 | {
786 | "cell_type": "code",
787 | "execution_count": 42,
788 | "metadata": {},
789 | "outputs": [
790 | {
791 | "name": "stdout",
792 | "output_type": "stream",
793 | "text": [
794 | "match\n"
795 | ]
796 | }
797 | ],
798 | "source": [
799 | "if m:\n",
800 | " print('match') \n",
801 | "else:\n",
802 | " print('no match')"
803 | ]
804 | },
805 | {
806 | "cell_type": "code",
807 | "execution_count": 43,
808 | "metadata": {},
809 | "outputs": [
810 | {
811 | "name": "stdout",
812 | "output_type": "stream",
813 | "text": [
814 | "0\n"
815 | ]
816 | }
817 | ],
818 | "source": [
819 | "print(m.start())"
820 | ]
821 | },
822 | {
823 | "cell_type": "code",
824 | "execution_count": 44,
825 | "metadata": {},
826 | "outputs": [
827 | {
828 | "name": "stdout",
829 | "output_type": "stream",
830 | "text": [
831 | "10\n"
832 | ]
833 | }
834 | ],
835 | "source": [
836 | "print(m.end())"
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": 45,
842 | "metadata": {},
843 | "outputs": [
844 | {
845 | "name": "stdout",
846 | "output_type": "stream",
847 | "text": [
848 | "(0, 10)\n"
849 | ]
850 | }
851 | ],
852 | "source": [
853 | "print(m.span())"
854 | ]
855 | },
856 | {
857 | "cell_type": "code",
858 | "execution_count": 46,
859 | "metadata": {},
860 | "outputs": [
861 | {
862 | "name": "stdout",
863 | "output_type": "stream",
864 | "text": [
865 | "1234567890\n"
866 | ]
867 | }
868 | ],
869 | "source": [
870 | "print(m.group())"
871 | ]
872 | },
873 | {
874 | "cell_type": "code",
875 | "execution_count": 47,
876 | "metadata": {},
877 | "outputs": [],
878 | "source": [
879 | "tele_num_spaces = '123 456 7890'"
880 | ]
881 | },
882 | {
883 | "cell_type": "code",
884 | "execution_count": 48,
885 | "metadata": {},
886 | "outputs": [
887 | {
888 | "name": "stdout",
889 | "output_type": "stream",
890 | "text": [
891 | "None\n"
892 | ]
893 | }
894 | ],
895 | "source": [
896 | "m = re.match(pattern='\\d{10}', string=tele_num_spaces) \n",
897 | "print(m)"
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 49,
903 | "metadata": {},
904 | "outputs": [
905 | {
906 | "name": "stdout",
907 | "output_type": "stream",
908 | "text": [
909 | "no match\n"
910 | ]
911 | }
912 | ],
913 | "source": [
914 | "if m:\n",
915 | " print('match') \n",
916 | "else:\n",
917 | " print('no match')"
918 | ]
919 | },
920 | {
921 | "cell_type": "code",
922 | "execution_count": 50,
923 | "metadata": {},
924 | "outputs": [
925 | {
926 | "name": "stdout",
927 | "output_type": "stream",
928 | "text": [
929 | "<_sre.SRE_Match object; span=(0, 12), match='123 456 7890'>\n"
930 | ]
931 | }
932 | ],
933 | "source": [
934 | "p = '\\d{3}\\s?\\d{3}\\s?\\d{4}' \n",
935 | "m = re.match(pattern=p, string=tele_num_spaces) \n",
936 | "print(m)"
937 | ]
938 | },
939 | {
940 | "cell_type": "code",
941 | "execution_count": 51,
942 | "metadata": {},
943 | "outputs": [
944 | {
945 | "name": "stdout",
946 | "output_type": "stream",
947 | "text": [
948 | "<_sre.SRE_Match object; span=(0, 14), match='(123) 456-7890'>\n"
949 | ]
950 | }
951 | ],
952 | "source": [
953 | "tele_num_space_paren_dash = '(123) 456-7890' \n",
954 | "p = '\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n",
955 | "m = re.match(pattern=p, string=tele_num_space_paren_dash) \n",
956 | "print(m)"
957 | ]
958 | },
959 | {
960 | "cell_type": "code",
961 | "execution_count": 52,
962 | "metadata": {},
963 | "outputs": [
964 | {
965 | "name": "stdout",
966 | "output_type": "stream",
967 | "text": [
968 | "<_sre.SRE_Match object; span=(0, 17), match='+1 (123) 456-7890'>\n"
969 | ]
970 | }
971 | ],
972 | "source": [
973 | "cnty_tele_num_space_paren_dash = '+1 (123) 456-7890' \n",
974 | "p = '\\+?1\\s?\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n",
975 | "m = re.match(pattern=p, string=cnty_tele_num_space_paren_dash) \n",
976 | "print(m)"
977 | ]
978 | },
979 | {
980 | "cell_type": "markdown",
981 | "metadata": {},
982 | "source": [
983 | "# 알아두면 좋아요!\n",
984 | "## compile 메서드로 정규식 메서드 사용하기(200쪽)"
985 | ]
986 | },
987 | {
988 | "cell_type": "code",
989 | "execution_count": 54,
990 | "metadata": {},
991 | "outputs": [
992 | {
993 | "name": "stdout",
994 | "output_type": "stream",
995 | "text": [
996 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n"
997 | ]
998 | }
999 | ],
1000 | "source": [
1001 | "p = re.compile('\\d{10}') \n",
1002 | "s = '1234567890' \n",
1003 | "m = p.match(s) \n",
1004 | "print(m)"
1005 | ]
1006 | }
1007 | ],
1008 | "metadata": {
1009 | "kernelspec": {
1010 | "display_name": "Python 3",
1011 | "language": "python",
1012 | "name": "python3"
1013 | },
1014 | "language_info": {
1015 | "codemirror_mode": {
1016 | "name": "ipython",
1017 | "version": 3
1018 | },
1019 | "file_extension": ".py",
1020 | "mimetype": "text/x-python",
1021 | "name": "python",
1022 | "nbconvert_exporter": "python",
1023 | "pygments_lexer": "ipython3",
1024 | "version": "3.6.5"
1025 | }
1026 | },
1027 | "nbformat": 4,
1028 | "nbformat_minor": 1
1029 | }
1030 |
--------------------------------------------------------------------------------
/notebook/09_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 문자열 추출하기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 전체 문자열 추출하기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 왼쪽이나 오른쪽 범위를 지정하지 않고 문자열 추출하기"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# join, splitlines, replace 메서드 실습하기"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 문자열 포매팅 실습하기"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# 수치값 포매팅 실습"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# % 연산자로 포매팅하기"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# f-strings로 포매팅 사용하기"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 정규식으로 전화번호 패턴 찾기"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": []
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "# compile 메서드로 정규식 메서드 사용하기"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": []
142 | }
143 | ],
144 | "metadata": {
145 | "kernelspec": {
146 | "display_name": "Python 3",
147 | "language": "python",
148 | "name": "python3"
149 | },
150 | "language_info": {
151 | "codemirror_mode": {
152 | "name": "ipython",
153 | "version": 3
154 | },
155 | "file_extension": ".py",
156 | "mimetype": "text/x-python",
157 | "name": "python",
158 | "nbconvert_exporter": "python",
159 | "pygments_lexer": "ipython3",
160 | "version": "3.6.5"
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 1
165 | }
166 |
--------------------------------------------------------------------------------
/notebook/10_done.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 제곱 함수와 n 제곱 함수 만들기(202쪽)"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "def my_sq(x):\n",
18 | " return x ** 2"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "def my_exp(x, n):\n",
28 | " return x ** n"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "16\n"
41 | ]
42 | }
43 | ],
44 | "source": [
45 | "print(my_sq(4))"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 3,
51 | "metadata": {},
52 | "outputs": [
53 | {
54 | "name": "stdout",
55 | "output_type": "stream",
56 | "text": [
57 | "16\n"
58 | ]
59 | }
60 | ],
61 | "source": [
62 | "print(my_exp(2, 4))"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "# 직접 해보세요!\n",
70 | "## 시리즈와 데이터프레임에 apply 메서드 사용하기(203쪽)"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "### 1. 시리즈와 apply 메서드"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 5,
83 | "metadata": {},
84 | "outputs": [
85 | {
86 | "name": "stdout",
87 | "output_type": "stream",
88 | "text": [
89 | " a b\n",
90 | "0 10 20\n",
91 | "1 20 30\n",
92 | "2 30 40\n"
93 | ]
94 | }
95 | ],
96 | "source": [
97 | "import pandas as pd\n",
98 | "\n",
99 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n",
100 | "\n",
101 | "print(df)"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 6,
107 | "metadata": {},
108 | "outputs": [
109 | {
110 | "name": "stdout",
111 | "output_type": "stream",
112 | "text": [
113 | "0 100\n",
114 | "1 400\n",
115 | "2 900\n",
116 | "Name: a, dtype: int64\n"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "print(df['a'] ** 2)"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 7,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "name": "stdout",
131 | "output_type": "stream",
132 | "text": [
133 | "0 100\n",
134 | "1 400\n",
135 | "2 900\n",
136 | "Name: a, dtype: int64\n"
137 | ]
138 | }
139 | ],
140 | "source": [
141 | "sq = df['a'].apply(my_sq) \n",
142 | "print(sq)"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 8,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "name": "stdout",
152 | "output_type": "stream",
153 | "text": [
154 | "0 100\n",
155 | "1 400\n",
156 | "2 900\n",
157 | "Name: a, dtype: int64\n"
158 | ]
159 | }
160 | ],
161 | "source": [
162 | "ex = df['a'].apply(my_exp, n=2) \n",
163 | "print(ex)"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 9,
169 | "metadata": {},
170 | "outputs": [
171 | {
172 | "name": "stdout",
173 | "output_type": "stream",
174 | "text": [
175 | "0 1000\n",
176 | "1 8000\n",
177 | "2 27000\n",
178 | "Name: a, dtype: int64\n"
179 | ]
180 | }
181 | ],
182 | "source": [
183 | "ex = df['a'].apply(my_exp, n=3) \n",
184 | "print(ex)"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {},
190 | "source": [
191 | "### 5. 데이터 프레임과 apply 메서드"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 5,
197 | "metadata": {},
198 | "outputs": [
199 | {
200 | "name": "stdout",
201 | "output_type": "stream",
202 | "text": [
203 | " a b\n",
204 | "0 10 20\n",
205 | "1 20 30\n",
206 | "2 30 40\n"
207 | ]
208 | }
209 | ],
210 | "source": [
211 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n",
212 | "print(df)"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 6,
218 | "metadata": {},
219 | "outputs": [],
220 | "source": [
221 | "def print_me(x): \n",
222 | " print(x)"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 7,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "name": "stdout",
232 | "output_type": "stream",
233 | "text": [
234 | "0 10\n",
235 | "1 20\n",
236 | "2 30\n",
237 | "Name: a, dtype: int64\n",
238 | "0 20\n",
239 | "1 30\n",
240 | "2 40\n",
241 | "Name: b, dtype: int64\n",
242 | "a None\n",
243 | "b None\n",
244 | "dtype: object\n"
245 | ]
246 | }
247 | ],
248 | "source": [
249 | "print(df.apply(print_me, axis=0))"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": 8,
255 | "metadata": {},
256 | "outputs": [
257 | {
258 | "name": "stdout",
259 | "output_type": "stream",
260 | "text": [
261 | "0 10\n",
262 | "1 20\n",
263 | "2 30\n",
264 | "Name: a, dtype: int64\n"
265 | ]
266 | }
267 | ],
268 | "source": [
269 | "print(df['a'])"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 9,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "name": "stdout",
279 | "output_type": "stream",
280 | "text": [
281 | "0 20\n",
282 | "1 30\n",
283 | "2 40\n",
284 | "Name: b, dtype: int64\n"
285 | ]
286 | }
287 | ],
288 | "source": [
289 | "print(df['b'])"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 10,
295 | "metadata": {},
296 | "outputs": [],
297 | "source": [
298 | "def avg_3(x, y, z):\n",
299 | " return (x + y + z) / 3"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": 11,
305 | "metadata": {},
306 | "outputs": [
307 | {
308 | "ename": "TypeError",
309 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')",
310 | "output_type": "error",
311 | "traceback": [
312 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
313 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
314 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
315 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[0;32m 6002\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6003\u001b[0m kwds=kwds)\n\u001b[1;32m-> 6004\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6005\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6006\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
316 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 316\u001b[0m *self.args, **self.kwds)\n\u001b[0;32m 317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 318\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFrameRowApply\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 319\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
317 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 140\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 141\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 142\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 143\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 144\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
318 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 246\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[1;31m# compute the result using the series generator\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 248\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 249\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 250\u001b[0m \u001b[1;31m# wrap results\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
319 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 277\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 278\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 279\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
320 | "\u001b[1;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')"
321 | ]
322 | }
323 | ],
324 | "source": [
325 | "print(df.apply(avg_3))"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": 12,
331 | "metadata": {},
332 | "outputs": [
333 | {
334 | "name": "stdout",
335 | "output_type": "stream",
336 | "text": [
337 | "a 20.0\n",
338 | "b 30.0\n",
339 | "dtype: float64\n"
340 | ]
341 | }
342 | ],
343 | "source": [
344 | "def avg_3_apply(col):\n",
345 | " x = col[0] \n",
346 | " y = col[1] \n",
347 | " z = col[2] \n",
348 | " return (x + y + z) / 3\n",
349 | "\n",
350 | "\n",
351 | "print(df.apply(avg_3_apply))"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 23,
357 | "metadata": {},
358 | "outputs": [],
359 | "source": [
360 | "def avg_3_apply(col):\n",
361 | " sum = 0\n",
362 | " for item in col:\n",
363 | " sum += item\n",
364 | " return sum / df.shape[0]"
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 31,
370 | "metadata": {},
371 | "outputs": [],
372 | "source": [
373 | "def avg_2_apply(row):\n",
374 | " sum = 0\n",
375 | " for item in row:\n",
376 | " sum += item\n",
377 | " return sum / df.shape[1]"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 32,
383 | "metadata": {},
384 | "outputs": [
385 | {
386 | "name": "stdout",
387 | "output_type": "stream",
388 | "text": [
389 | "0 15.0\n",
390 | "1 25.0\n",
391 | "2 35.0\n",
392 | "dtype: float64\n"
393 | ]
394 | }
395 | ],
396 | "source": [
397 | "print(df.apply(avg_2_apply, axis = 1))"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "metadata": {},
403 | "source": [
404 | "# 직접 해보세요!\n",
405 | "## 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기(208쪽)"
406 | ]
407 | },
408 | {
409 | "cell_type": "markdown",
410 | "metadata": {},
411 | "source": [
412 | "### 1. 데이터프레임의 누락값 처리하기 ― 열 방향"
413 | ]
414 | },
415 | {
416 | "cell_type": "code",
417 | "execution_count": 33,
418 | "metadata": {},
419 | "outputs": [],
420 | "source": [
421 | "import seaborn as sns\n",
422 | "\n",
423 | "titanic = sns.load_dataset(\"titanic\")"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 34,
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "name": "stdout",
433 | "output_type": "stream",
434 | "text": [
435 | "\n",
436 | "RangeIndex: 891 entries, 0 to 890\n",
437 | "Data columns (total 15 columns):\n",
438 | "survived 891 non-null int64\n",
439 | "pclass 891 non-null int64\n",
440 | "sex 891 non-null object\n",
441 | "age 714 non-null float64\n",
442 | "sibsp 891 non-null int64\n",
443 | "parch 891 non-null int64\n",
444 | "fare 891 non-null float64\n",
445 | "embarked 889 non-null object\n",
446 | "class 891 non-null category\n",
447 | "who 891 non-null object\n",
448 | "adult_male 891 non-null bool\n",
449 | "deck 203 non-null category\n",
450 | "embark_town 889 non-null object\n",
451 | "alive 891 non-null object\n",
452 | "alone 891 non-null bool\n",
453 | "dtypes: bool(2), category(2), float64(2), int64(4), object(5)\n",
454 | "memory usage: 80.6+ KB\n",
455 | "None\n"
456 | ]
457 | }
458 | ],
459 | "source": [
460 | "print(titanic.info())"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": 10,
466 | "metadata": {},
467 | "outputs": [],
468 | "source": [
469 | "import numpy as np\n",
470 | "\n",
471 | "def count_missing(vec):\n",
472 | " null_vec = pd.isnull(vec)\n",
473 | " null_count = np.sum(null_vec)\n",
474 | " return null_count"
475 | ]
476 | },
477 | {
478 | "cell_type": "code",
479 | "execution_count": 41,
480 | "metadata": {},
481 | "outputs": [
482 | {
483 | "name": "stdout",
484 | "output_type": "stream",
485 | "text": [
486 | "survived 0\n",
487 | "pclass 0\n",
488 | "sex 0\n",
489 | "age 177\n",
490 | "sibsp 0\n",
491 | "parch 0\n",
492 | "fare 0\n",
493 | "embarked 2\n",
494 | "class 0\n",
495 | "who 0\n",
496 | "adult_male 0\n",
497 | "deck 688\n",
498 | "embark_town 2\n",
499 | "alive 0\n",
500 | "alone 0\n",
501 | "dtype: int64\n"
502 | ]
503 | }
504 | ],
505 | "source": [
506 | "cmis_col = titanic.apply(count_missing)\n",
507 | "print(cmis_col)"
508 | ]
509 | },
510 | {
511 | "cell_type": "code",
512 | "execution_count": 43,
513 | "metadata": {},
514 | "outputs": [],
515 | "source": [
516 | "def prop_missing(vec):\n",
517 | " num = count_missing(vec)\n",
518 | " dem = vec.size\n",
519 | " return num / dem"
520 | ]
521 | },
522 | {
523 | "cell_type": "code",
524 | "execution_count": 45,
525 | "metadata": {},
526 | "outputs": [
527 | {
528 | "name": "stdout",
529 | "output_type": "stream",
530 | "text": [
531 | "survived 0.000000\n",
532 | "pclass 0.000000\n",
533 | "sex 0.000000\n",
534 | "age 0.198653\n",
535 | "sibsp 0.000000\n",
536 | "parch 0.000000\n",
537 | "fare 0.000000\n",
538 | "embarked 0.002245\n",
539 | "class 0.000000\n",
540 | "who 0.000000\n",
541 | "adult_male 0.000000\n",
542 | "deck 0.772166\n",
543 | "embark_town 0.002245\n",
544 | "alive 0.000000\n",
545 | "alone 0.000000\n",
546 | "dtype: float64\n"
547 | ]
548 | }
549 | ],
550 | "source": [
551 | "pmis_col = titanic.apply(prop_missing)\n",
552 | "print(pmis_col)"
553 | ]
554 | },
555 | {
556 | "cell_type": "code",
557 | "execution_count": 46,
558 | "metadata": {},
559 | "outputs": [],
560 | "source": [
561 | "def prop_complete(vec):\n",
562 | " return 1 - prop_missing(vec)"
563 | ]
564 | },
565 | {
566 | "cell_type": "markdown",
567 | "metadata": {},
568 | "source": [
569 | "### 8. 데이터프레임의 누락값을 처리하기 ― 행 방뱡"
570 | ]
571 | },
572 | {
573 | "cell_type": "code",
574 | "execution_count": 48,
575 | "metadata": {},
576 | "outputs": [
577 | {
578 | "name": "stdout",
579 | "output_type": "stream",
580 | "text": [
581 | "0 1\n",
582 | "1 0\n",
583 | "2 1\n",
584 | "3 0\n",
585 | "4 1\n",
586 | "dtype: int64\n"
587 | ]
588 | }
589 | ],
590 | "source": [
591 | "cmis_row = titanic.apply(count_missing, axis=1)\n",
592 | "pmis_row = titanic.apply(prop_missing, axis=1)\n",
593 | "pcom_row = titanic.apply(prop_complete, axis=1)\n",
594 | "\n",
595 | "print(cmis_row.head())"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": 49,
601 | "metadata": {},
602 | "outputs": [
603 | {
604 | "name": "stdout",
605 | "output_type": "stream",
606 | "text": [
607 | "0 0.066667\n",
608 | "1 0.000000\n",
609 | "2 0.066667\n",
610 | "3 0.000000\n",
611 | "4 0.066667\n",
612 | "dtype: float64\n"
613 | ]
614 | }
615 | ],
616 | "source": [
617 | "print(pmis_row.head())"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "execution_count": 50,
623 | "metadata": {},
624 | "outputs": [
625 | {
626 | "name": "stdout",
627 | "output_type": "stream",
628 | "text": [
629 | "0 0.933333\n",
630 | "1 1.000000\n",
631 | "2 0.933333\n",
632 | "3 1.000000\n",
633 | "4 0.933333\n",
634 | "dtype: float64\n"
635 | ]
636 | }
637 | ],
638 | "source": [
639 | "print(pcom_row.head())"
640 | ]
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": 51,
645 | "metadata": {},
646 | "outputs": [
647 | {
648 | "name": "stdout",
649 | "output_type": "stream",
650 | "text": [
651 | " survived pclass sex age sibsp parch fare embarked class \\\n",
652 | "0 0 3 male 22.0 1 0 7.2500 S Third \n",
653 | "1 1 1 female 38.0 1 0 71.2833 C First \n",
654 | "2 1 3 female 26.0 0 0 7.9250 S Third \n",
655 | "3 1 1 female 35.0 1 0 53.1000 S First \n",
656 | "4 0 3 male 35.0 0 0 8.0500 S Third \n",
657 | "\n",
658 | " who adult_male deck embark_town alive alone num_missing \n",
659 | "0 man True NaN Southampton no False 1 \n",
660 | "1 woman False C Cherbourg yes False 0 \n",
661 | "2 woman False NaN Southampton yes True 1 \n",
662 | "3 woman False C Southampton yes False 0 \n",
663 | "4 man True NaN Southampton no True 1 \n"
664 | ]
665 | }
666 | ],
667 | "source": [
668 | "titanic['num_missing'] = titanic.apply(count_missing, axis=1)\n",
669 | "\n",
670 | "print(titanic.head())"
671 | ]
672 | },
673 | {
674 | "cell_type": "code",
675 | "execution_count": 53,
676 | "metadata": {},
677 | "outputs": [
678 | {
679 | "name": "stdout",
680 | "output_type": "stream",
681 | "text": [
682 | " survived pclass sex age sibsp parch fare embarked class \\\n",
683 | "186 1 3 female NaN 1 0 15.5000 Q Third \n",
684 | "274 1 3 female NaN 0 0 7.7500 Q Third \n",
685 | "410 0 3 male NaN 0 0 7.8958 S Third \n",
686 | "547 1 2 male NaN 0 0 13.8625 C Second \n",
687 | "601 0 3 male NaN 0 0 7.8958 S Third \n",
688 | "578 0 3 female NaN 1 0 14.4583 C Third \n",
689 | "76 0 3 male NaN 0 0 7.8958 S Third \n",
690 | "560 0 3 male NaN 0 0 7.7500 Q Third \n",
691 | "511 0 3 male NaN 0 0 8.0500 S Third \n",
692 | "495 0 3 male NaN 0 0 14.4583 C Third \n",
693 | "\n",
694 | " who adult_male deck embark_town alive alone num_missing \n",
695 | "186 woman False NaN Queenstown yes False 2 \n",
696 | "274 woman False NaN Queenstown yes True 2 \n",
697 | "410 man True NaN Southampton no True 2 \n",
698 | "547 man True NaN Cherbourg yes True 2 \n",
699 | "601 man True NaN Southampton no True 2 \n",
700 | "578 woman False NaN Cherbourg no False 2 \n",
701 | "76 man True NaN Southampton no True 2 \n",
702 | "560 man True NaN Queenstown no True 2 \n",
703 | "511 man True NaN Southampton no True 2 \n",
704 | "495 man True NaN Cherbourg no True 2 \n"
705 | ]
706 | }
707 | ],
708 | "source": [
709 | "print(titanic.loc[titanic.num_missing > 1, :].sample(10))"
710 | ]
711 | }
712 | ],
713 | "metadata": {
714 | "kernelspec": {
715 | "display_name": "Python 3",
716 | "language": "python",
717 | "name": "python3"
718 | },
719 | "language_info": {
720 | "codemirror_mode": {
721 | "name": "ipython",
722 | "version": 3
723 | },
724 | "file_extension": ".py",
725 | "mimetype": "text/x-python",
726 | "name": "python",
727 | "nbconvert_exporter": "python",
728 | "pygments_lexer": "ipython3",
729 | "version": "3.6.5"
730 | }
731 | },
732 | "nbformat": 4,
733 | "nbformat_minor": 1
734 | }
735 |
--------------------------------------------------------------------------------
/notebook/10_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 제곱 함수와 n 제곱 함수 만들기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 시리즈와 apply 메서드"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 데이터 프레임과 apply 메서드"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기 - 열 방향"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기 - 행 방뱡"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | }
73 | ],
74 | "metadata": {
75 | "kernelspec": {
76 | "display_name": "Python 3",
77 | "language": "python",
78 | "name": "python3"
79 | },
80 | "language_info": {
81 | "codemirror_mode": {
82 | "name": "ipython",
83 | "version": 3
84 | },
85 | "file_extension": ".py",
86 | "mimetype": "text/x-python",
87 | "name": "python",
88 | "nbconvert_exporter": "python",
89 | "pygments_lexer": "ipython3",
90 | "version": "3.6.5"
91 | }
92 | },
93 | "nbformat": 4,
94 | "nbformat_minor": 1
95 | }
96 |
--------------------------------------------------------------------------------
/notebook/11_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# groupby 메서드로 평균값 구하기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 분할-반영-결합 과정 살펴보기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 평균값을 구하는 사용자 함수와 groupby 메서드"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# 두 개의 인잣값을 받아 처리하는 사용자 함수와 groupby 메서드"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# 집계 메서드를 리스트, 딕셔너리에 담아 전달하기"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# 표준 점수 계산하기"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# 누락값을 평균값으로 처리하기"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# 데이터 필터링 사용하기 ─ filter 메서드"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 그룹 오브젝트 저장하여 살펴보기"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": []
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "# 그룹 오브젝트의 평균 구하기"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": []
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "# 그룹 오브젝트에서 데이터 추출하고 반복하기"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": []
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "# 그룹 오브젝트 계산하고 살펴보기"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": []
170 | }
171 | ],
172 | "metadata": {
173 | "kernelspec": {
174 | "display_name": "Python 3",
175 | "language": "python",
176 | "name": "python3"
177 | },
178 | "language_info": {
179 | "codemirror_mode": {
180 | "name": "ipython",
181 | "version": 3
182 | },
183 | "file_extension": ".py",
184 | "mimetype": "text/x-python",
185 | "name": "python",
186 | "nbconvert_exporter": "python",
187 | "pygments_lexer": "ipython3",
188 | "version": "3.6.5"
189 | }
190 | },
191 | "nbformat": 4,
192 | "nbformat_minor": 1
193 | }
194 |
--------------------------------------------------------------------------------
/notebook/12_practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# datetime 오브젝트 사용하기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": []
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# 문자열을 datetime 오브젝트로 변환하기"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": []
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# 시간 데이터를 잘라내고 싶어요"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# datetime 오브젝트로 변환하려는 열을 지정하여 데이터 집합 불러오기"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# datetime 오브젝트에서 날짜 정보 추출하기"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "# dt 접근자로 시간 데이터 정리하기"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# 에볼라 최초 발생일 계산해보기"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# 파산한 은행의 개수 계산하기"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# 테슬라 주식 데이터로 시간 계산하기"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": []
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "# datetime 오브젝트를 인덱스로 설정하여 데이터 추출하기"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": []
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "# 시간 간격을 인덱스로 설정하여 데이터 추출하기"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": []
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "# 시간 범위 생성하여 인덱스로 지정하기"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": []
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {},
174 | "source": [
175 | "# 시간 범위의 주기 설정하기"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": []
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "# 에볼라 발병 시간 비교하기"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {},
196 | "outputs": [],
197 | "source": []
198 | }
199 | ],
200 | "metadata": {
201 | "kernelspec": {
202 | "display_name": "Python 3",
203 | "language": "python",
204 | "name": "python3"
205 | },
206 | "language_info": {
207 | "codemirror_mode": {
208 | "name": "ipython",
209 | "version": 3
210 | },
211 | "file_extension": ".py",
212 | "mimetype": "text/x-python",
213 | "name": "python",
214 | "nbconvert_exporter": "python",
215 | "pygments_lexer": "ipython3",
216 | "version": "3.6.5"
217 | }
218 | },
219 | "nbformat": 4,
220 | "nbformat_minor": 1
221 | }
222 |
--------------------------------------------------------------------------------
/notebook/Special.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 직접 해보세요!\n",
8 | "## 코드의 성능을 향상시켜 실행 시간 측정하기 ― timeit"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import pandas as pd\n",
18 | "import numpy as np"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]})"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "def avg_2_apply(row):\n",
37 | " x = row[0]\n",
38 | " y = row[1]\n",
39 | " if(x == 20):\n",
40 | " return np.nan\n",
41 | " else:\n",
42 | " return (x + y)/2"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "### 2. 판다스 데이터프레임 ― 실행 시간 측정"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 4,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "511 µs ± 5.98 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
62 | ]
63 | }
64 | ],
65 | "source": [
66 | "%%timeit\n",
67 | "df.apply(avg_2_apply, axis = 1)"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "### 3. 넘파이로 벡터화한 함수 사용하기 ― 실행 시간 측정"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 5,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "@np.vectorize\n",
84 | "def v_avg_2mod(x, y):\n",
85 | " if(x == 20):\n",
86 | " return (np.NaN)\n",
87 | " else:\n",
88 | " return (x + y) / 2"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 6,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "name": "stdout",
98 | "output_type": "stream",
99 | "text": [
100 | "36 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "%%timeit\n",
106 | "v_avg_2mod(df['a'], df['b'])"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "### 5. numba 라이브러리로 벡터화한 함수 사용하기 ― 실행 시간 측정"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 7,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "import numba\n",
123 | "\n",
124 | "@numba.vectorize\n",
125 | "def v_avg_2_numba(x, y):\n",
126 | " if(x == 20):\n",
127 | " return (np.NaN)\n",
128 | " else:\n",
129 | " return (x + y) / 2"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 8,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "name": "stdout",
139 | "output_type": "stream",
140 | "text": [
141 | "4.46 µs ± 47.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
142 | ]
143 | }
144 | ],
145 | "source": [
146 | "%%timeit\n",
147 | "v_avg_2_numba(df['a'].values, df['b'].values)"
148 | ]
149 | }
150 | ],
151 | "metadata": {
152 | "kernelspec": {
153 | "display_name": "Python 3",
154 | "language": "python",
155 | "name": "python3"
156 | },
157 | "language_info": {
158 | "codemirror_mode": {
159 | "name": "ipython",
160 | "version": 3
161 | },
162 | "file_extension": ".py",
163 | "mimetype": "text/x-python",
164 | "name": "python",
165 | "nbconvert_exporter": "python",
166 | "pygments_lexer": "ipython3",
167 | "version": "3.6.5"
168 | }
169 | },
170 | "nbformat": 4,
171 | "nbformat_minor": 2
172 | }
173 |
--------------------------------------------------------------------------------
/notebook/hello_jupyter_notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": []
9 | }
10 | ],
11 | "metadata": {
12 | "kernelspec": {
13 | "display_name": "Python 3",
14 | "language": "python",
15 | "name": "python3"
16 | },
17 | "language_info": {
18 | "codemirror_mode": {
19 | "name": "ipython",
20 | "version": 3
21 | },
22 | "file_extension": ".py",
23 | "mimetype": "text/x-python",
24 | "name": "python",
25 | "nbconvert_exporter": "python",
26 | "pygments_lexer": "ipython3",
27 | "version": "3.6.5"
28 | }
29 | },
30 | "nbformat": 4,
31 | "nbformat_minor": 2
32 | }
33 |
--------------------------------------------------------------------------------
/output/scientist_names_series.csv:
--------------------------------------------------------------------------------
1 | 0,Rosaline Franklin
2 | 1,William Gosset
3 | 2,Florence Nightingale
4 | 3,Marie Curie
5 | 4,Rachel Carson
6 | 5,John Snow
7 | 6,Alan Turing
8 | 7,Johann Gauss
9 |
--------------------------------------------------------------------------------
/output/scientists_df.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_df.pickle
--------------------------------------------------------------------------------
/output/scientists_df.tsv:
--------------------------------------------------------------------------------
1 | Name Born Died Age Occupation born_dt died_dt age_days_dt
2 | 0 Rosaline Franklin 1920-07-25 1958-04-16 66 Chemist 1920-07-25 1958-04-16 13779 days 00:00:00.000000000
3 | 1 William Gosset 1876-06-13 1937-10-16 56 Statistician 1876-06-13 1937-10-16 22404 days 00:00:00.000000000
4 | 2 Florence Nightingale 1820-05-12 1910-08-13 41 Nurse 1820-05-12 1910-08-13 32964 days 00:00:00.000000000
5 | 3 Marie Curie 1867-11-07 1934-07-04 77 Chemist 1867-11-07 1934-07-04 24345 days 00:00:00.000000000
6 | 4 Rachel Carson 1907-05-27 1964-04-14 90 Biologist 1907-05-27 1964-04-14 20777 days 00:00:00.000000000
7 | 5 John Snow 1813-03-15 1858-06-16 45 Physician 1813-03-15 1858-06-16 16529 days 00:00:00.000000000
8 | 6 Alan Turing 1912-06-23 1954-06-07 37 Computer Scientist 1912-06-23 1954-06-07 15324 days 00:00:00.000000000
9 | 7 Johann Gauss 1777-04-30 1855-02-23 61 Mathematician 1777-04-30 1855-02-23 28422 days 00:00:00.000000000
10 |
--------------------------------------------------------------------------------
/output/scientists_df.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_df.xlsx
--------------------------------------------------------------------------------
/output/scientists_df_no_index.csv:
--------------------------------------------------------------------------------
1 | Name,Born,Died,Age,Occupation,born_dt,died_dt,age_days_dt
2 | Rosaline Franklin,1920-07-25,1958-04-16,66,Chemist,1920-07-25,1958-04-16,13779 days 00:00:00.000000000
3 | William Gosset,1876-06-13,1937-10-16,56,Statistician,1876-06-13,1937-10-16,22404 days 00:00:00.000000000
4 | Florence Nightingale,1820-05-12,1910-08-13,41,Nurse,1820-05-12,1910-08-13,32964 days 00:00:00.000000000
5 | Marie Curie,1867-11-07,1934-07-04,77,Chemist,1867-11-07,1934-07-04,24345 days 00:00:00.000000000
6 | Rachel Carson,1907-05-27,1964-04-14,90,Biologist,1907-05-27,1964-04-14,20777 days 00:00:00.000000000
7 | John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16,16529 days 00:00:00.000000000
8 | Alan Turing,1912-06-23,1954-06-07,37,Computer Scientist,1912-06-23,1954-06-07,15324 days 00:00:00.000000000
9 | Johann Gauss,1777-04-30,1855-02-23,61,Mathematician,1777-04-30,1855-02-23,28422 days 00:00:00.000000000
10 |
--------------------------------------------------------------------------------
/output/scientists_names_series.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_names_series.pickle
--------------------------------------------------------------------------------
/output/scientists_names_series_df.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_names_series_df.xls
--------------------------------------------------------------------------------
/output/scientists_names_series_df.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_names_series_df.xlsx
--------------------------------------------------------------------------------